- Finer-grained asynchronous dispatch in parallax daemon.
- Cleanups and cull of older code.
- Fixes to handle changes in block protocol.
Signed-off-by: andrew.warfield@cl.cam.ac.uk
4209033eUwhDBJ_bxejiv5c6gjXS4A tools/blktap/Makefile
4209033ewLAHdhGrT_2jo3Gb_5bDcA tools/blktap/README
42277b02mYXxgijE7MFeUe9d8eldMw tools/blktap/README-PARALLAX
-4209033eX_Xw94wHaOCtnU9nOAtSJA tools/blktap/blkaio.c
-4209033egwf6LDxM2hbaqi9rRdZy4A tools/blktap/blkaiolib.c
-4209033f9yELLK85Ipo2oKjr3ickgQ tools/blktap/blkaiolib.h
-4209033fL9LcSI6LXrIp5O4axbUBLg tools/blktap/blkcow.c
-4209033fUDlFGZreIyZHdP7h7yfvuQ tools/blktap/blkcowgnbd.c
-4209033fCgZzLeMOwNBFmsp99x58ZQ tools/blktap/blkcowimg.c
-4209033frfXH6oOi9AvRz08PPAndNA tools/blktap/blkcowlib.c
-4209033fhFd_y2go9HgCF395A35xJg tools/blktap/blkcowlib.h
4209033fHgtGpb_K16_xC9CpkjNZLw tools/blktap/blkdump.c
-4209033fm61CZG1RyKDW75V-eTZ9fg tools/blktap/blkgnbd.c
-4209033fVfa-R6MFgGcmsQHTDna4PA tools/blktap/blkgnbdlib.c
-4209033fIgDQbaHwHStHhPEDTtbqsA tools/blktap/blkgnbdlib.h
-4209033figp5JRsKsXY8rw4keRumkg tools/blktap/blkimg.c
-42090340V-8HKGlr00SyJGsE5jXC3A tools/blktap/blkimglib.c
-42090340c7pQbh0Km8zLcEqPd_3zIg tools/blktap/blkimglib.h
42090340_mvZtozMjghPJO0qsjk4NQ tools/blktap/blkint.h
42090340rc2q1wmlGn6HtiJAkqhtNQ tools/blktap/blktaplib.c
42090340C-WkRPT7N3t-8Lzehzogdw tools/blktap/blktaplib.h
-423f270cAbkh2f-DHtT0hmCtFFXVXg tools/blktap/blockstore-tls.c
+428df8fdkg84W8yveE50EbkbTUZgjQ tools/blktap/block-async.c
+428df8feTrgGFZEBMA_dYijy9DNs1g tools/blktap/block-async.h
42277b02WrfP1meTDPv1M5swFq8oHQ tools/blktap/blockstore.c
42277b02P1C0FYj3gqwTZUD8sxKCug tools/blktap/blockstore.h
42371b8aL1JsxAXOd4bBhmZKDyjiJg tools/blktap/blockstored.c
42371b8aD_x3L9MKsXciMNqkuk58eQ tools/blktap/bstest.c
-42090340B3mDvcxvd9ehDHUkg46hvw tools/blktap/libgnbd/Makefile
-42090340ZWkc5Xhf9lpQmDON8HJXww tools/blktap/libgnbd/gnbdtest.c
-42090340ocMiUScJE3OpY7QNunvSbg tools/blktap/libgnbd/libgnbd.c
-42090340G5_F_EeVnPORKB0pTMGGhA tools/blktap/libgnbd/libgnbd.h
423f270cbEKiTMapKnCyqkuwGvgOMA tools/blktap/parallax-threaded.c
423f270cFdXryIcD7HTPUl_Dbk4DAQ tools/blktap/parallax-threaded.h
42277b03930x2TJT3PZlw6o0GERXpw tools/blktap/parallax.c
42277b03XQYq8bujXSz7JAZ8N7j_pA tools/blktap/radix.c
42277b03vZ4-jno_mgKmAcCW3ycRAg tools/blktap/radix.h
+428df8fe5RYONloDWVMkM-CfHfB1vA tools/blktap/requests-async.c
+428df8feWeKJ-9HJb5_rFqdm_xqErg tools/blktap/requests-async.h
42277b03U_wLHL-alMA0bfxGlqldXg tools/blktap/snaplog.c
42277b04Ryya-z662BEx8HnxNN0dGQ tools/blktap/snaplog.h
42277b04LxFjptgZ75Z98DUAso4Prg tools/blktap/vdi.c
PLX_SRCS += vdi.c
PLX_SRCS += radix.c
PLX_SRCS += snaplog.c
+PLX_SRCS += blockstore.c
+PLX_SRCS += block-async.c
PLXT_SRCS := $(PLX_SRCS)
-#PLXT_SRCS += blockstore-tls.c
-PLXT_SRCS += blockstore.c
PLXT_SRCS += parallax-threaded.c
-PLX_SRCS += blockstore.c
VDI_SRCS := $(PLX_SRCS)
+PLX_SRCS += requests-async.c
PLX_SRCS += parallax.c
VDI_TOOLS :=
DEPS = .*.d
OBJS = $(patsubst %.c,%.o,$(SRCS))
+IBINS = blkdump parallax $(VDI_TOOLS)
LIB = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
-all: mk-symlinks blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd $(VDI_TOOLS) parallax parallax-threaded blockstored
+all: mk-symlinks blkdump $(VDI_TOOLS) parallax parallax-threaded blockstored
$(MAKE) $(LIB)
LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse)
$(INSTALL_DIR) -p $(DESTDIR)/usr/include
$(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
$(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
- $(INSTALL_PROG) blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd $(DESTDIR)/$(BLKTAP_INSTALL_DIR)
+ $(INSTALL_PROG) $(IBINS) $(DESTDIR)/$(BLKTAP_INSTALL_DIR)
clean:
- rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio $(VDI_TOOLS) parallax
+ rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump $(VDI_TOOLS) parallax parallax-threaded
rpm: all
rm -rf staging
blkdump: $(LIB)
$(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkdump.c
-blkcowimg: $(LIB) blkcowimg.c blkcowlib.c blkimglib.c
- $(CC) $(CFLAGS) -o blkcowimg -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkcowimg.c blkimglib.c blkcowlib.c
-
-blkcow: $(LIB) blkcow.c blkcowlib.c
- $(CC) $(CFLAGS) -o blkcow -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkcow.c blkcowlib.c
-
-blkimg: $(LIB) blkimg.c blkimglib.c
- $(CC) $(CFLAGS) -o blkimg -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkimg.c blkimglib.c
-
-blkgnbd: $(LIB) blkgnbd.c blkgnbdlib.c
- $(CC) $(CFLAGS) -o blkgnbd -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkgnbd.c blkgnbdlib.c libgnbd/libgnbd.a
-
-blkcowgnbd: $(LIB) blkgnbd.c blkcowlib.c blkgnbdlib.c
- $(CC) $(CFLAGS) -o blkcowgnbd -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkcowgnbd.c blkgnbdlib.c blkcowlib.c libgnbd/libgnbd.a
-
-blkaio: $(LIB) blkaio.c blkaiolib.c
- $(CC) $(CFLAGS) -o blkaio -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkaio.c blkaiolib.c -laio -lpthread
-
parallax: $(LIB) $(PLX_SRCS)
- $(CC) $(CFLAGS) -o parallax -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap -lpthread $(PLX_SRCS) libgnbd/libgnbd.a
+ $(CC) $(CFLAGS) -o parallax -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap -lpthread $(PLX_SRCS)
parallax-threaded: $(LIB) $(PLXT_SRCS)
- $(CC) $(CFLAGS) -o parallax-threaded -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lpthread -lblktap $(PLXT_SRCS) libgnbd/libgnbd.a
-
-vdi_test: $(LIB) $(VDI_SRCS)
- $(CC) $(CFLAGS) -g3 -o vdi_test -DVDI_STANDALONE -lpthread $(VDI_SRCS)
+ $(CC) $(CFLAGS) -o parallax-threaded -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lpthread -lblktap $(PLXT_SRCS)
vdi_list: $(LIB) vdi_list.c $(VDI_SRCS)
$(CC) $(CFLAGS) -g3 -o vdi_list vdi_list.c -lpthread $(VDI_SRCS)
-include $(DEPS)
-#Random testing targets. To be removed eventually.
-
-rdx_cmp: $(LIB) rdx_cmp.c $(VDI_SRCS)
- $(CC) $(CFLAGS) -g3 -o rdx_cmp rdx_cmp.c $(VDI_SRCS)
-
-bb-tls: $(LIB) blockstore-benchmark.c
- $(CC) $(CFLAGS) -o bb-tls blockstore-benchmark.c blockstore-tls.c -lpthread
-
-bb-trans: $(LIB) blockstore-benchmark.c
- $(CC) $(CFLAGS) -o bb-trans blockstore-benchmark.c blockstore.c -lpthread
-
-radix-test: $(LIB) radix.c blockstore.c
- $(CC) $(CFLAGS) -g3 -D RADIX_STANDALONE -o radix-test radix.c blockstore-threaded-trans.c
+++ /dev/null
-/* blkaio.c
- *
- * libaio-backed disk.
- */
-
-#include "blktaplib.h"
-#include "blkaiolib.h"
-
-
-int main(int argc, char *argv[])
-{
- aio_init();
-
- blktap_register_ctrl_hook("aio_control", aio_control);
- blktap_register_request_hook("aio_request", aio_request);
- blktap_listen();
-
- return 0;
-}
+++ /dev/null
-/* blkaiolib.c
- *
- * file/device image-backed block device -- using linux libaio.
- *
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- *
- * NOTE: This doesn't work. Grrr.
- */
-
-#define _GNU_SOURCE
-#define __USE_LARGEFILE64
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <string.h>
-#include <db.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/poll.h>
-#include <unistd.h>
-#include <errno.h>
-#include <libaio.h>
-#include <pthread.h>
-#include <time.h>
-#include "blktaplib.h"
-
-//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
-#define TMP_IMAGE_FILE_NAME "fc3.image"
-
-#define MAX_DOMS 1024
-#define MAX_IMGNAME_LEN 255
-#define AMORFS_DEV 61440
-#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */
-#define MAX_SEGMENTS_PER_REQ 11
-#define SECTOR_SHIFT 9
-#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
-
-#if 1
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-#if 1
-#define ASSERT(_p) \
- if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
- __LINE__, __FILE__); *(int*)0=0; }
-#else
-#define ASSERT(_p) ((void)0)
-#endif
-
-char dbg_page[4096];
-
-typedef struct {
- /* These need to turn into an array/rbtree for multi-disk support. */
- int fd;
- u64 fsid;
- char imgname[MAX_IMGNAME_LEN];
- blkif_vdev_t vdevice;
-} image_t;
-
-/* Note on pending_reqs: I assume all reqs are queued before they start to
- * get filled. so count of 0 is an unused record.
- */
-typedef struct {
- blkif_request_t req;
- int count;
-} pending_req_t;
-
-static pending_req_t pending_list[MAX_REQUESTS];
-image_t *images[MAX_DOMS];
-
-static io_context_t ctx;
-static struct iocb *iocb_free[MAX_AIO_REQS];
-static int iocb_free_count;
-
-/* ---[ Notification mecahnism ]--------------------------------------- */
-
-enum {
- READ = 0,
- WRITE = 1
-};
-
-static int aio_notify[2];
-static volatile int aio_listening = 0;
-
-static struct io_event aio_events[MAX_AIO_REQS];
-static int aio_event_count = 0;
-
-/* this is commented out in libaio.h for some reason. */
-extern int io_queue_wait(io_context_t ctx, struct timespec *timeout);
-
-static void *notifier_thread(void *arg)
-{
- int ret;
- int msg = 0x00feeb00;
-
- printf("Notifier thread started.\n");
- for (;;) {
- //if ((aio_listening) && ((ret = io_queue_wait(ctx, 0)) == 0)) {
- if ((aio_listening) &&
- ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0)) {
- aio_event_count = ret;
- printf("[Notifying! (%d)]\n", aio_event_count);
- aio_listening = 0;
- write(aio_notify[WRITE], &msg, sizeof(msg));
- fsync(aio_notify[WRITE]);
- } else {
- if (aio_listening)
- printf("[io_queue_wait error! %d]\n", errno);
- usleep(1000); /* Not ready to read. */
- }
- }
-}
-
-/* -------------------------------------------------------------------- */
-
-int aio_control(control_msg_t *msg)
-{
- domid_t domid;
- DB *db;
- int ret;
-
- if (msg->type != CMSG_BLKIF_BE)
- {
- printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
- return 0;
- }
-
- switch(msg->subtype)
- {
- case CMSG_BLKIF_BE_CREATE:
- if ( msg->length != sizeof(blkif_be_create_t) )
- goto parse_error;
- printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
- ((blkif_be_create_t *)msg->msg)->domid,
- ((blkif_be_create_t *)msg->msg)->blkif_handle);
- domid = ((blkif_be_create_t *)msg->msg)->domid;
- if (images[domid] != NULL) {
- printf("attempt to connect from an existing dom!\n");
- return 0;
- }
-
- images[domid] = (image_t *)malloc(sizeof(image_t));
- if (images[domid] == NULL) {
- printf("error allocating image record.\n");
- return 0;
- }
-
- images[domid]->fd = -1;
- images[domid]->fsid = 0;
-
- printf("Image connected.\n");
- break;
-
- case CMSG_BLKIF_BE_DESTROY:
- if ( msg->length != sizeof(blkif_be_destroy_t) )
- goto parse_error;
- printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
- ((blkif_be_destroy_t *)msg->msg)->domid,
- ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
-
- domid = ((blkif_be_destroy_t *)msg->msg)->domid;
- if (images[domid] != NULL) {
- if (images[domid]->fd != -1)
- close( images[domid]->fd );
- free( images[domid] );
- images[domid] = NULL;
- }
- break;
- case CMSG_BLKIF_BE_VBD_GROW:
- {
- blkif_be_vbd_grow_t *grow;
-
- if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
- goto parse_error;
- printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
- ((blkif_be_vbd_grow_t *)msg->msg)->domid,
- ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
- ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
- printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
- ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
- ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
- ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
- grow = (blkif_be_vbd_grow_t *)msg->msg;
- domid = grow->domid;
- if (images[domid] == NULL) {
- printf("VBD_GROW on unconnected domain!\n");
- return 0;
- }
-
- if (grow->extent.device != AMORFS_DEV) {
- printf("VBD_GROW on non-amorfs device!\n");
- return 0;
- }
-
- /* TODO: config support for arbitrary image files/modes. */
- sprintf(images[domid]->imgname, TMP_IMAGE_FILE_NAME);
-
- images[domid]->fsid = grow->extent.sector_start;
- images[domid]->vdevice = grow->vdevice;
- images[domid]->fd = open(TMP_IMAGE_FILE_NAME,
- O_RDWR | O_DIRECT | O_LARGEFILE);
- if (images[domid]->fd < 0) {
- printf("Couldn't open image file! %d\n", errno);
- return 0;
- }
-
- printf("Image file opened. (%s)\n", images[domid]->imgname);
- break;
- }
- }
- return 0;
-parse_error:
- printf("Bad control message!\n");
- return 0;
-
-create_failed:
- /* TODO: close the db ref. */
- return 0;
-}
-
-int aio_request(blkif_request_t *req)
-{
- int fd;
- u64 sector;
- char *spage, *dpage;
- int ret, i, idx;
- blkif_response_t *rsp;
- domid_t dom = ID_TO_DOM(req->id);
-
- if ((images[dom] == NULL) || (images[dom]->fd == -1)) {
- printf("Data request for unknown domain!!! %d\n", dom);
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = req->operation;
- rsp->status = BLKIF_RSP_ERROR;
- return BLKTAP_RESPOND;
- }
-
- fd = images[dom]->fd;
-
- switch (req->operation)
- {
- case BLKIF_OP_PROBE:
- {
- struct stat stat;
- vdisk_t *img_info;
-
-
- /* We expect one buffer only. */
- if ( req->nr_segments != 1 )
- goto err;
-
- /* Make sure the buffer is page-sized. */
- if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
- (blkif_last_sect (req->frame_and_sects[0]) != 7) )
- goto err;
-
- /* loop for multiple images would start here. */
-
- ret = fstat(fd, &stat);
- if (ret != 0) {
- printf("Couldn't stat image in PROBE!\n");
- goto err;
- }
-
- img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
- img_info[0].device = images[dom]->vdevice;
- img_info[0].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
- img_info[0].capacity = (stat.st_size >> SECTOR_SHIFT);
-
- if (img_info[0].capacity == 0)
- img_info[0].capacity = ((u64)1 << 63); // xend does this too.
-
- DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", img_info[0].device,
- img_info[0].capacity);
-
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_PROBE;
- rsp->status = 1; /* number of disks */
-
- return BLKTAP_RESPOND;
- }
- case BLKIF_OP_WRITE:
- {
- unsigned long size;
- struct iocb *io;
- struct iocb *ioq[MAX_SEGMENTS_PER_REQ];
-
- idx = ID_TO_IDX(req->id);
- ASSERT(pending_list[idx].count == 0);
- memcpy(&pending_list[idx].req, req, sizeof(*req));
- pending_list[idx].count = req->nr_segments;
-
- for (i = 0; i < req->nr_segments; i++) {
-
- sector = req->sector_number + (8*i);
-
- size = blkif_last_sect (req->frame_and_sects[i]) -
- blkif_first_sect(req->frame_and_sects[i]) + 1;
-
- DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n",
- req->sector_number, sector,
- blkif_first_sect(req->frame_and_sects[i]),
- blkif_last_sect (req->frame_and_sects[i]),
- (long)(sector << SECTOR_SHIFT));
-
- spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
- spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
-
- /*convert size and sector to byte offsets */
- size <<= SECTOR_SHIFT;
- sector <<= SECTOR_SHIFT;
-
- io = iocb_free[--iocb_free_count];
- io_prep_pwrite(io, fd, spage, size, sector);
- io->data = (void *)idx;
- ioq[i] = io;
- }
-
- ret = io_submit(ctx, req->nr_segments, ioq);
- if (ret < 0)
- printf("BADNESS: io_submit error! (%d)\n", errno);
-
- pending_list[idx].count = req->nr_segments;
-
- return BLKTAP_STOLEN;
-
- }
- case BLKIF_OP_READ:
- {
- unsigned long size;
- struct iocb *io;
- struct iocb *ioq[MAX_SEGMENTS_PER_REQ];
-
- idx = ID_TO_IDX(req->id);
- ASSERT(pending_list[idx].count == 0);
- memcpy(&pending_list[idx].req, req, sizeof(*req));
- pending_list[idx].count = req->nr_segments;
-
- for (i = 0; i < req->nr_segments; i++) {
-
- sector = req->sector_number + (8*i);
-
- size = blkif_last_sect (req->frame_and_sects[i]) -
- blkif_first_sect(req->frame_and_sects[i]) + 1;
-
- dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
- dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
-
-
- DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
- "pos: %15lu dpage: %p\n",
- req->sector_number, sector,
- blkif_first_sect(req->frame_and_sects[i]),
- blkif_last_sect (req->frame_and_sects[i]),
- (long)(sector << SECTOR_SHIFT), dpage);
-
- /*convert size and sector to byte offsets */
- size <<= SECTOR_SHIFT;
- sector <<= SECTOR_SHIFT;
-
- io = iocb_free[--iocb_free_count];
-
- io_prep_pread(io, fd, dpage, size, sector);
- io->data = (void *)idx;
-
- ioq[i] = io;
- }
-
- ret = io_submit(ctx, req->nr_segments, ioq);
- if (ret < 0)
- printf("BADNESS: io_submit error! (%d)\n", errno);
-
-
- return BLKTAP_STOLEN;
-
- }
- }
-
- printf("Unknown block operation!\n");
-err:
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = req->operation;
- rsp->status = BLKIF_RSP_ERROR;
- return BLKTAP_RESPOND;
-}
-
-
-int aio_pollhook(int fd)
-{
- struct io_event *ep;
- int n, ret, idx;
- blkif_request_t *req;
- blkif_response_t *rsp;
-
- DPRINTF("aio_hook(): \n");
-
- for (ep = aio_events; aio_event_count-- > 0; ep++) {
- struct iocb *io = ep->obj;
- idx = (int) ep->data;
-
- if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
- printf("gnbd returned a bad cookie (%u)!\n", idx);
- break;
- }
-
- if ((int)ep->res < 0) printf("aio request error! (%d,%d)\n",
- (int)ep->res, (int)ep->res2);
-
- pending_list[idx].count--;
- iocb_free[iocb_free_count++] = io;
-
- if (pending_list[idx].count == 0) {
- blkif_request_t tmp = pending_list[idx].req;
- rsp = (blkif_response_t *)&pending_list[idx].req;
- rsp->id = tmp.id;
- rsp->operation = tmp.operation;
- rsp->status = BLKIF_RSP_OKAY;
- blktap_inject_response(rsp);
- }
- }
-
- printf("pollhook done!\n");
-
- read(aio_notify[READ], &idx, sizeof(idx));
- aio_listening = 1;
-
- return 0;
-}
-
-/* the image library terminates the request stream. _resp is a noop. */
-int aio_response(blkif_response_t *rsp)
-{
- return BLKTAP_PASS;
-}
-
-void aio_init(void)
-{
- int i, rc;
- pthread_t p;
-
- for (i = 0; i < MAX_DOMS; i++)
- images[i] = NULL;
-
- for (i = 0; i < MAX_REQUESTS; i++)
- pending_list[i].count = 0;
-
- memset(&ctx, 0, sizeof(ctx));
- rc = io_queue_init(MAX_AIO_REQS, &ctx);
- if (rc != 0) {
- printf("queue_init failed! (%d)\n", rc);
- exit(0);
- }
-
- for (i=0; i<MAX_AIO_REQS; i++) {
- if (!(iocb_free[i] = (struct iocb *)malloc(sizeof(struct iocb)))) {
- printf("error allocating iocb array\n");
- exit(0);
- }
- iocb_free_count = i;
- }
-
- rc = pipe(aio_notify);
- if (rc != 0) {
- printf("pipe failed! (%d)\n", errno);
- exit(0);
- }
-
- rc = pthread_create(&p, NULL, notifier_thread, NULL);
- if (rc != 0) {
- printf("pthread_create failed! (%d)\n", errno);
- exit(0);
- }
-
- aio_listening = 1;
-
- blktap_attach_poll(aio_notify[READ], POLLIN, aio_pollhook);
-}
-
+++ /dev/null
-/* blkaiolib.h
- *
- * aio image-backed block device.
- *
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- */
-
-int aio_control(control_msg_t *msg);
-int aio_request(blkif_request_t *req);
-int aio_response(blkif_response_t *rsp); /* noop */
-void aio_init(void);
+++ /dev/null
-/* blkcow.c
- *
- * copy on write a block device. in a really inefficient way.
- *
- * (c) 2004 Andrew Warfield.
- *
- * This uses whatever backend the tap is attached to as the read-only
- * underlay -- for the moment.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent,
- * the cow plugin uses this to identify a unique overlay.
- */
-
-#include "blktaplib.h"
-#include "blkcowlib.h"
-
-
-int main(int argc, char *argv[])
-{
- cow_init();
-
- blktap_register_ctrl_hook("cow_control", cow_control);
- blktap_register_request_hook("cow_request", cow_request);
- blktap_register_response_hook("cow_response", cow_response);
- blktap_listen();
-
- return 0;
-}
+++ /dev/null
-/* blkcowgnbd.c
- *
- * gnbd-backed cow.
- */
-
-#include "blktaplib.h"
-#include "blkcowlib.h"
-#include "blkgnbdlib.h"
-
-
-int main(int argc, char *argv[])
-{
- cow_init();
- gnbd_init();
-
- blktap_register_ctrl_hook("cow_control", cow_control);
- blktap_register_ctrl_hook("gnbd_control", gnbd_control);
- blktap_register_request_hook("cow_request", cow_request);
- blktap_register_request_hook("gnbd_request", gnbd_request);
- blktap_register_response_hook("cow_response", cow_response);
- blktap_listen();
-
- return 0;
-}
+++ /dev/null
-/* blkcowimg.c
- *
- * file-backed cow.
- */
-
-#include "blktaplib.h"
-#include "blkcowlib.h"
-#include "blkimglib.h"
-
-
-int main(int argc, char *argv[])
-{
- cow_init();
- image_init();
-
- blktap_register_ctrl_hook("cow_control", cow_control);
- blktap_register_ctrl_hook("image_control", image_control);
- blktap_register_request_hook("cow_request", cow_request);
- blktap_register_request_hook("image_request", image_request);
- blktap_register_response_hook("cow_response", cow_response);
- blktap_listen();
-
- return 0;
-}
+++ /dev/null
-/* blkcowlib.c
- *
- * copy on write a block device. in a really inefficient way.
- *
- * (c) 2004 Andrew Warfield.
- *
- * This uses whatever backend the tap is attached to as the read-only
- * underlay -- for the moment.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent,
- * the cow plugin uses this to identify a unique overlay.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <db.h>
-#include "blktaplib.h"
-
-#define MAX_DOMS 1024
-#define MAX_DBNAME_LEN 255
-#define AMORFS_DEV 61440
-#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* Berkeley db has different params for open() after 4.1 */
-#ifndef DB_VERSION_MAJOR
-# define DB_VERSION_MAJOR 1
-#endif /* DB_VERSION_MAJOR */
-#ifndef DB_VERSION_MINOR
-# define DB_VERSION_MINOR 0
-#endif /* DB_VERSION_MINOR */
-
-typedef struct {
- DB *db;
- u64 fsid;
- char dbname[MAX_DBNAME_LEN];
-} cow_t;
-
-cow_t *cows[MAX_DOMS];
-blkif_request_t *reread_list[MAX_REQUESTS];
-
-int cow_control(control_msg_t *msg)
-{
- domid_t domid;
- DB *db;
- int ret;
-
- if (msg->type != CMSG_BLKIF_BE)
- {
- printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
- return 0;
- }
-
- switch(msg->subtype)
- {
- case CMSG_BLKIF_BE_CREATE:
- if ( msg->length != sizeof(blkif_be_create_t) )
- goto parse_error;
- printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
- ((blkif_be_create_t *)msg->msg)->domid,
- ((blkif_be_create_t *)msg->msg)->blkif_handle);
- domid = ((blkif_be_create_t *)msg->msg)->domid;
- if (cows[domid] != NULL) {
- printf("attempt to connect from an existing dom!\n");
- return 0;
- }
-
- cows[domid] = (cow_t *)malloc(sizeof(cow_t));
- if (cows[domid] == NULL) {
- printf("error allocating cow.\n");
- return 0;
- }
-
- cows[domid]->db = NULL;
- cows[domid]->fsid = 0;
-
- printf("COW connected.\n");
- break;
-
- case CMSG_BLKIF_BE_DESTROY:
- if ( msg->length != sizeof(blkif_be_destroy_t) )
- goto parse_error;
- printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
- ((blkif_be_destroy_t *)msg->msg)->domid,
- ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
-
- domid = ((blkif_be_destroy_t *)msg->msg)->domid;
- if (cows[domid] != NULL) {
- if (cows[domid]->db != NULL)
- cows[domid]->db->close(cows[domid]->db, 0);
- free(cows[domid]);
- cows[domid] = NULL;
- }
- break;
- case CMSG_BLKIF_BE_VBD_GROW:
- {
- blkif_be_vbd_grow_t *grow;
-
- if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
- goto parse_error;
- printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
- ((blkif_be_vbd_grow_t *)msg->msg)->domid,
- ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
- ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
- printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
- ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
- ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
- ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
- grow = (blkif_be_vbd_grow_t *)msg->msg;
- domid = grow->domid;
- if (cows[domid] == NULL) {
- printf("VBD_GROW on unconnected domain!\n");
- return 0;
- }
-
- if (grow->extent.device != AMORFS_DEV) {
- printf("VBD_GROW on non-amorfs device!\n");
- return 0;
- }
-
- sprintf(&cows[domid]->dbname[0], "%020llu.db",
- grow->extent.sector_start);
-
- cows[domid]->fsid = grow->extent.sector_start;
-
- if ((ret = db_create(&db, NULL, 0)) != 0) {
- fprintf(stderr, "db_create: %s\n", db_strerror(ret));
- return 0;
- }
-
-
-#if DB_VERSION_MAJOR < 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 1)
-
- if ((ret = db->open( db, cows[domid]->dbname, NULL, DB_BTREE,
- DB_CREATE, 0664)) != 0) {
-
-#else /* DB_VERSION >= 4.1 */
-
- if ((ret = db->open( db, NULL, cows[domid]->dbname, NULL, DB_BTREE,
- DB_CREATE, 0664)) != 0) {
-
-#endif /* DB_VERSION < 4.1 */
-
- db->err(db, ret, "%s", cows[domid]->dbname);
- goto create_failed;
- }
- cows[domid]->db = db;
- printf("Overlay db opened. (%s)\n", cows[domid]->dbname);
- break;
- }
- }
- return 0;
-parse_error:
- printf("Bad control message!\n");
- return 0;
-
-create_failed:
- /* TODO: close the db ref. */
- return 0;
-}
-
-int cow_request(blkif_request_t *req)
-{
- DB *db;
- DBT key, data;
- u64 sector;
- char *spage, *dpage;
- int ret, i, idx;
- blkif_response_t *rsp;
- domid_t dom = ID_TO_DOM(req->id);
-
- if ((cows[dom] == NULL) || (cows[dom]->db == NULL)) {
- printf("Data request for unknown domain!!! %d\n", dom);
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = req->operation;
- rsp->status = BLKIF_RSP_ERROR;
- return BLKTAP_RESPOND;
- }
-
- db = cows[dom]->db;
-
- switch (req->operation)
- {
- case BLKIF_OP_PROBE:
-/* debug -- delete */
-idx = ID_TO_IDX(req->id);
-reread_list[idx] = (blkif_request_t *)malloc(sizeof(*req));
-memcpy(reread_list[idx], req, sizeof(*req));
- return BLKTAP_PASS;
-
- case BLKIF_OP_WRITE:
- for (i = 0; i < req->nr_segments; i++) {
- memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
-
- sector = req->sector_number + (8*i);
- key.data = §or;
- key.size = sizeof(sector);
-
- spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
- data.data = spage;
- data.size = PAGE_SIZE;
-
-
- DPRINTF("cWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n",
- req->sector_number, sector,
- blkif_first_sect(req->frame_and_sects[i]),
- blkif_last_sect (req->frame_and_sects[i]),
- (long)(sector << 9));
-
- if ((ret = db->put(db, NULL, &key, &data, 0)) == 0)
- DPRINTF("db: %lld: key stored.\n", *((u64 *)key.data));
- else {
- db->err(db, ret, "DB->put");
- goto err;
- }
- }
-
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_WRITE;
- rsp->status = BLKIF_RSP_OKAY;
-
- return BLKTAP_RESPOND;
-
- case BLKIF_OP_READ:
- for (i = 0; i < req->nr_segments; i++) {
- memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
-
- sector = req->sector_number + (8*i);
- key.data = §or;
- key.size = sizeof(sector);
-
- DPRINTF("cREAD: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n",
- req->sector_number, sector,
- blkif_first_sect(req->frame_and_sects[i]),
- blkif_last_sect (req->frame_and_sects[i]),
- (long)(sector << 9));
-
- if ((ret = db->get(db, NULL, &key, &data, 0)) == 0) {
- DPRINTF("db: %llu: key retrieved (req).\n",
- *((u64 *)key.data));
-
- dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
- spage = data.data;
- memcpy(dpage, spage, PAGE_SIZE);
-
- } else if (ret == DB_NOTFOUND) {
- idx = ID_TO_IDX(req->id);
- if (idx > MAX_REQUESTS) {
- printf("Bad index!\n");
- goto err;
- }
- if (reread_list[idx] != NULL) {
- printf("Dupe index!\n");
- goto err;
- }
- reread_list[idx] = (blkif_request_t *)malloc(sizeof(*req));
- memcpy(reread_list[idx], req, sizeof(*req));
- return BLKTAP_PASS;
- } else {
- db->err(db, ret, "DB->get");
- goto err;
- }
- }
-
-
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_READ;
- rsp->status = BLKIF_RSP_OKAY;
- return BLKTAP_RESPOND;
- }
-
- printf("Unknow block operation!\n");
- return BLKTAP_PASS;
-err:
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = req->operation;
- rsp->status = BLKIF_RSP_ERROR;
- return BLKTAP_RESPOND;
-}
-
-int cow_response(blkif_response_t *rsp)
-{
- blkif_request_t *req;
- int i, ret;
- DB *db;
- DBT key, data;
- u64 sector;
- char *spage, *dpage;
- int idx = ID_TO_IDX(rsp->id);
- domid_t dom;
-
- /* don't touch erroring responses. */
- if (rsp->status == BLKIF_RSP_ERROR)
- return BLKTAP_PASS;
-
- if ((rsp->operation == BLKIF_OP_READ) && (reread_list[idx] != NULL))
- {
- req = reread_list[idx];
- dom = ID_TO_DOM(req->id);
-
- if ((cows[dom] == NULL) || (cows[dom]->db == NULL)) {
- printf("Response from unknown domain!!! Very badness! %d\n", dom);
- return BLKTAP_PASS;
- }
-
- db = cows[dom]->db;
-
- for (i = 0; i < req->nr_segments; i++) {
- memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
-
- sector = req->sector_number + (8*i);
- key.data = §or;
- key.size = sizeof(sector);
-
- if ((ret = db->get(db, NULL, &key, &data, 0)) == 0) {
- printf("db: %llu: key retrieved (rsp).\n",
- *((u64 *)key.data));
-
- dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
- spage = data.data;
- memcpy(dpage, spage, PAGE_SIZE);
-
- } else if (ret == DB_NOTFOUND) {
- continue; /* We read this from disk. */
- } else {
- db->err(db, ret, "DB->get");
- goto err;
- }
- }
- free(reread_list[idx]);
- reread_list[idx] = NULL;
- }
-
- if (rsp->operation == BLKIF_OP_PROBE) {
-
- vdisk_t *img_info;
-
- req = reread_list[idx];
- img_info = (vdisk_t *)(char *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
- for (i =0; i < rsp->status; i++)
- printf("PROBE (%d) device: 0x%04x capacity: %llu, info: 0x%04x\n",
- i,
- img_info[0].device,
- img_info[0].capacity,
- img_info[0].info);
- free(reread_list[idx]);
- reread_list[idx] = NULL;
- }
-
-err:
- return BLKTAP_PASS;
-}
-
-void cow_init(void)
-{
- int i;
-
- for (i = 0; i < MAX_DOMS; i++)
- cows[i] = NULL;
-
- for (i = 0; i < MAX_REQUESTS; i++)
- reread_list[MAX_REQUESTS] = NULL;
-}
-
+++ /dev/null
-/* blkcowlib.h
- *
- * copy on write a block device. in a really inefficient way.
- *
- * (c) 2004 Andrew Warfield.
- *
- * public interfaces to the CoW tap.
- *
- */
-
-int cow_control (control_msg_t *msg);
-int cow_request (blkif_request_t *req);
-int cow_response (blkif_response_t *rsp);
-void cow_init (void);
((blkif_be_vbd_destroy_t *)msg->msg)->blkif_handle,
((blkif_be_vbd_destroy_t *)msg->msg)->vdevice);
break;
- case CMSG_BLKIF_BE_VBD_GROW:
- if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
- goto parse_error;
- printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
- ((blkif_be_vbd_grow_t *)msg->msg)->domid,
- ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
- ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
- printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
- ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
- ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
- ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
- break;
default:
goto parse_error;
}
+++ /dev/null
-/* blkgnbd.c
- *
- * gnbd-backed disk.
- */
-
-#include "blktaplib.h"
-#include "blkgnbdlib.h"
-
-
-int main(int argc, char *argv[])
-{
- gnbd_init();
-
- blktap_register_ctrl_hook("gnbd_control", gnbd_control);
- blktap_register_request_hook("gnbd_request", gnbd_request);
- blktap_listen();
-
- return 0;
-}
+++ /dev/null
-/* blkgnbdlib.c
- *
- * gnbd image-backed block device.
- *
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <db.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/poll.h>
-#include "blktaplib.h"
-#include "libgnbd/libgnbd.h"
-
-#define GNBD_SERVER "skirmish.cl.cam.ac.uk"
-#define GNBD_CLIENT "pengi-0.xeno.cl.cam.ac.uk"
-#define GNBD_MOUNT "fc2_akw27"
-#define GNBD_PORT 0x38e7
-
-#define MAX_DOMS 1024
-#define MAX_IMGNAME_LEN 255
-#define AMORFS_DEV 61440
-#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */
-#define SECTOR_SHIFT 9
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-#if 1
-#define ASSERT(_p) \
- if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
- __LINE__, __FILE__); *(int*)0=0; }
-#else
-#define ASSERT(_p) ((void)0)
-#endif
-
-#define GH_DISCONNECTED 0
-#define GH_PROBEWAITING 1
-#define GH_CONNECTED 2
-
-typedef struct {
- /* These need to turn into an array/rbtree for multi-disk support. */
- struct gnbd_handle *gh;
- int gh_state;
- int probe_idx; /* This really needs cleaning up after hotos. */
- int fd;
- u64 fsid;
- char gnbdname[MAX_IMGNAME_LEN];
- blkif_vdev_t vdevice;
-} gnbd_t;
-
-/* Note on pending_reqs: I assume all reqs are queued before they start to
- * get filled. so count of 0 is an unused record.
- */
-typedef struct {
- blkif_request_t req;
- int count;
-} pending_req_t;
-
-static gnbd_t *gnbds[MAX_DOMS];
-static pending_req_t pending_list[MAX_REQUESTS];
-static int pending_count = 0; /* debugging */
-
-
-gnbd_t *get_gnbd_by_fd(int fd)
-{
- /* this is a linear scan for the moment. nees to be cleaned up for
- multi-disk support. */
-
- int i;
-
- for (i=0; i< MAX_DOMS; i++)
- if ((gnbds[i] != NULL) && (gnbds[i]->fd == fd))
- return gnbds[i];
-
- return NULL;
-}
-
-int gnbd_pollhook(int fd);
-
-int gnbd_control(control_msg_t *msg)
-{
- domid_t domid;
- DB *db;
- int ret;
-
- if (msg->type != CMSG_BLKIF_BE)
- {
- printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
- return 0;
- }
-
- switch(msg->subtype)
- {
- case CMSG_BLKIF_BE_CREATE:
- if ( msg->length != sizeof(blkif_be_create_t) )
- goto parse_error;
- printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
- ((blkif_be_create_t *)msg->msg)->domid,
- ((blkif_be_create_t *)msg->msg)->blkif_handle);
- domid = ((blkif_be_create_t *)msg->msg)->domid;
- if (gnbds[domid] != NULL) {
- printf("attempt to connect from an existing dom!\n");
- return 0;
- }
-
- gnbds[domid] = (gnbd_t *)malloc(sizeof(gnbd_t));
- if (gnbds[domid] == NULL) {
- printf("error allocating gnbd record.\n");
- return 0;
- }
-
- gnbds[domid]->gh = NULL;
- gnbds[domid]->fsid = 0;
-
- break;
-
- case CMSG_BLKIF_BE_DESTROY:
- if ( msg->length != sizeof(blkif_be_destroy_t) )
- goto parse_error;
- printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
- ((blkif_be_destroy_t *)msg->msg)->domid,
- ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
-
- domid = ((blkif_be_destroy_t *)msg->msg)->domid;
- if (gnbds[domid] != NULL) {
- if (gnbds[domid]->gh != NULL) {
- blktap_detach_poll(gnbds[domid]->fd);
- free(gnbds[domid]->gh); /* XXX: Need a gnbd close call! */;
- }
- free( gnbds[domid] );
- gnbds[domid] = NULL;
- }
- break;
- case CMSG_BLKIF_BE_VBD_GROW:
- {
- blkif_be_vbd_grow_t *grow;
-
- if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
- goto parse_error;
- printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
- ((blkif_be_vbd_grow_t *)msg->msg)->domid,
- ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
- ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
- printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
- ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
- ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
- ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
- grow = (blkif_be_vbd_grow_t *)msg->msg;
- domid = grow->domid;
- if (gnbds[domid] == NULL) {
- printf("VBD_GROW on unconnected domain!\n");
- return 0;
- }
-
- if (grow->extent.device != AMORFS_DEV) {
- printf("VBD_GROW on non-amorfs device!\n");
- return 0;
- }
-
- /* TODO: config support for arbitrary gnbd files/modes. */
- sprintf(gnbds[domid]->gnbdname, GNBD_MOUNT);
-
- gnbds[domid]->fsid = grow->extent.sector_start;
- gnbds[domid]->vdevice = grow->vdevice;
- gnbds[domid]->gh_state = GH_DISCONNECTED;
- gnbds[domid]->gh = gnbd_setup(GNBD_SERVER, GNBD_PORT,
- gnbds[domid]->gnbdname, GNBD_CLIENT);
- if (gnbds[domid]->gh == NULL) {
- printf("Couldn't connect to gnbd mount!!\n");
- return 0;
- }
- gnbds[domid]->fd = gnbd_fd(gnbds[domid]->gh);
- blktap_attach_poll(gnbds[domid]->fd, POLLIN, gnbd_pollhook);
-
- printf("gnbd mount connected. (%s)\n", gnbds[domid]->gnbdname);
- break;
- }
- }
- return 0;
-parse_error:
- printf("Bad control message!\n");
- return 0;
-
-create_failed:
- /* TODO: close the db ref. */
- return 0;
-}
-
-static int gnbd_blkif_probe(blkif_request_t *req, gnbd_t *gnbd)
-{
- int fd;
- struct stat stat;
- vdisk_t *gnbd_info;
- blkif_response_t *rsp;
-
- /* We expect one buffer only. */
- if ( req->nr_segments != 1 )
- goto err;
-
- /* Make sure the buffer is page-sized. */
- if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
- (blkif_last_sect (req->frame_and_sects[0]) != 7) )
- goto err;
-
- /* loop for multiple gnbds would start here. */
-
- gnbd_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
- gnbd_info[0].device = gnbd->vdevice;
- gnbd_info[0].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
- gnbd_info[0].capacity = gnbd_sectors(gnbd->gh);
-
- printf("[SECTORS] %llu", gnbd_info[0].capacity);
-
- //if (gnbd_info[0].capacity == 0)
- // gnbd_info[0].capacity = ((u64)1 << 63); // xend does this too.
-
- DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", gnbd_info[0].device,
- gnbd_info[0].capacity);
-
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_PROBE;
- rsp->status = 1; /* number of disks */
-
- return BLKTAP_RESPOND;
-err:
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = req->operation;
- rsp->status = BLKIF_RSP_ERROR;
- return BLKTAP_RESPOND;
-}
-
-int gnbd_request(blkif_request_t *req)
-{
- struct gnbd_handle *gh;
- u64 sector;
- char *spage, *dpage;
- int ret, i, idx;
- blkif_response_t *rsp;
- domid_t dom = ID_TO_DOM(req->id);
-
- if ((gnbds[dom] == NULL) || (gnbds[dom]->gh == NULL)) {
- printf("Data request for unknown domain!!! %d\n", dom);
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = req->operation;
- rsp->status = BLKIF_RSP_ERROR;
- return BLKTAP_RESPOND;
- }
-
- gh = gnbds[dom]->gh;
-
- switch (req->operation)
- {
- case BLKIF_OP_PROBE:
- {
- printf("PROBE!\n");
- if ( gnbds[dom]->gh_state == GH_PROBEWAITING ) {
- printf("Already have a PROBE outstanding!\n");
- goto err;
- }
-
- if ( gnbds[dom]->gh_state == GH_DISCONNECTED )
- {
- /* need to defer until we are connected. */
- printf("Deferring PROBE!\n");
- idx = ID_TO_IDX(req->id);
- memcpy(&pending_list[idx].req, req, sizeof(*req));
- ASSERT(pending_list[idx].count == 0);
- pending_list[idx].count = 1;
-
- gnbds[dom]->probe_idx = idx;
- gnbds[dom]->gh_state = GH_PROBEWAITING;
-
- return BLKTAP_STOLEN;
- }
-
-
- return gnbd_blkif_probe(req, gnbds[dom]);
- }
- case BLKIF_OP_WRITE:
- {
- unsigned long size;
-
- idx = ID_TO_IDX(req->id);
- ASSERT(pending_list[idx].count == 0);
- memcpy(&pending_list[idx].req, req, sizeof(*req));
- pending_list[idx].count = req->nr_segments;
- pending_count++; /* dbg */
-
- for (i = 0; i < req->nr_segments; i++) {
-
- sector = req->sector_number + (8*i);
-
- size = blkif_last_sect (req->frame_and_sects[i]) -
- blkif_first_sect(req->frame_and_sects[i]) + 1;
-
- DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n",
- req->sector_number, sector,
- blkif_first_sect(req->frame_and_sects[i]),
- blkif_last_sect (req->frame_and_sects[i]),
- (long)(sector << SECTOR_SHIFT));
-
- spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
- spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
-
- ret = gnbd_write(gh, sector, size, spage, (unsigned long)idx);
- if (ret) {
- printf("gnbd error on WRITE\n");
- goto err;
- }
- }
-//printf("[WR] < %lu\n", (unsigned long)idx);
-
- return BLKTAP_STOLEN;
- }
- case BLKIF_OP_READ:
- {
- unsigned long size;
-
- idx = ID_TO_IDX(req->id);
- ASSERT(pending_list[idx].count == 0);
- memcpy(&pending_list[idx].req, req, sizeof(*req));
- pending_list[idx].count = req->nr_segments;
- pending_count++; /* dbg */
-
- for (i = 0; i < req->nr_segments; i++) {
-
- sector = req->sector_number + (8*i);
-
- size = blkif_last_sect (req->frame_and_sects[i]) -
- blkif_first_sect(req->frame_and_sects[i]) + 1;
-
- DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n",
- req->sector_number, sector,
- blkif_first_sect(req->frame_and_sects[i]),
- blkif_last_sect (req->frame_and_sects[i]),
- (long)(sector << SECTOR_SHIFT));
-
- dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
- dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
-
- ret = gnbd_read(gh, sector, size, dpage, (unsigned long)idx);
- if (ret) {
- printf("gnbd error on READ\n");
- goto err;
- }
-
- }
-//printf("[RD] < %lu\n", (unsigned long)idx);
-
- return BLKTAP_STOLEN;
- }
- }
-
- printf("Unknown block operation!\n");
-err:
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = req->operation;
- rsp->status = BLKIF_RSP_ERROR;
- return BLKTAP_RESPOND;
-}
-
-/* the gnbd library terminates the request stream. _resp is a noop. */
-int gnbd_response(blkif_response_t *rsp)
-{
- return BLKTAP_PASS;
-}
-
-int gnbd_pollhook(int fd)
-{
- int err;
- struct gnbd_handle *gh;
- blkif_request_t *req;
- blkif_response_t *rsp;
- unsigned long idx;
-
- gnbd_t *gnbd = get_gnbd_by_fd(fd);
-
- if (gnbd == NULL) {
- printf("GNBD badness: got poll hook on unknown device. (%d)\n", fd);
- return -1;
- }
- gh = gnbd->gh;
- err = gnbd_reply(gh);
- switch (err) {
- case GNBD_LOGIN_DONE:
- if (gnbd->gh_state == GH_PROBEWAITING) {
- req = (blkif_request_t *)&pending_list[gnbd->probe_idx].req;
- printf("[!] Sending deferred PROBE!\n");
- gnbd_blkif_probe(req, gnbd);
- pending_list[gnbd->probe_idx].count = 0;
- rsp = (blkif_response_t *)req;
- blktap_inject_response(rsp);
- }
- gnbd->gh_state = GH_CONNECTED;
- printf("GNBD_LOGIN_DONE (%d)\n", fd);
- break;
-
- case GNBD_REQUEST_DONE: /* switch to idx */
- idx = gnbd_finished_request(gh);
- req = (blkif_request_t *)&pending_list[idx].req;
- if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
- printf("gnbd returned a bad cookie (%lu)!\n", idx);
- break;
- }
-
- pending_list[idx].count--;
-
- if (pending_list[idx].count == 0) {
- blkif_request_t tmp = *req;
- pending_count--; /* dbg */
- rsp = (blkif_response_t *)req;
- rsp->id = tmp.id;
- rsp->operation = tmp.operation;
- rsp->status = BLKIF_RSP_OKAY;
- blktap_inject_response(rsp);
-/*
-if (rsp->operation == BLKIF_OP_READ) {
-printf("[RD] > %lu (%d pndg)\n", (unsigned long)idx, pending_count);
-} else if (rsp->operation == BLKIF_OP_WRITE) {
-printf("[WR] > %lu (%d pndg)\n", (unsigned long)idx, pending_count);
-} else {
-printf("[??] > %lu (%d pndg)\n", (unsigned long)idx, pending_count);
-}
-*/
- }
- break;
-
- case GNBD_CONTINUE:
- break;
-
- case 0:
- break;
-
- default:
- printf("gnbd_reply error");
- break;
- }
- return 0;
-}
-
-void gnbd_init(void)
-{
- int i;
-
- for (i = 0; i < MAX_DOMS; i++)
- gnbds[i] = NULL;
-
- for (i = 0; i < MAX_REQUESTS; i++)
- pending_list[i].count = 0;
-
- printf("GNBD image plugin initialized\n");
-}
-
+++ /dev/null
-/* blkgnbdlib.h
- *
- * gndb image-backed block device.
- *
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- */
-
-int gnbd_control(control_msg_t *msg);
-int gnbd_request(blkif_request_t *req);
-int gnbd_response(blkif_response_t *rsp); /* noop */
-void gnbd_init(void);
+++ /dev/null
-/* blkimg.c
- *
- * file-backed disk.
- */
-
-#include "blktaplib.h"
-#include "blkimglib.h"
-
-
-int main(int argc, char *argv[])
-{
- image_init();
-
- blktap_register_ctrl_hook("image_control", image_control);
- blktap_register_request_hook("image_request", image_request);
- blktap_listen();
-
- return 0;
-}
+++ /dev/null
-/* blkimglib.c
- *
- * file image-backed block device.
- *
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <db.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <errno.h>
-#include "blktaplib.h"
-
-//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
-#define TMP_IMAGE_FILE_NAME "fc3.image"
-
-#define MAX_DOMS 1024
-#define MAX_IMGNAME_LEN 255
-#define AMORFS_DEV 61440
-#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */
-#define SECTOR_SHIFT 9
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-
-typedef struct {
- /* These need to turn into an array/rbtree for multi-disk support. */
- FILE *img;
- u64 fsid;
- char imgname[MAX_IMGNAME_LEN];
- blkif_vdev_t vdevice;
-} image_t;
-
-image_t *images[MAX_DOMS];
-blkif_request_t *reread_list[MAX_REQUESTS];
-
-int image_control(control_msg_t *msg)
-{
- domid_t domid;
- DB *db;
- int ret;
-
- if (msg->type != CMSG_BLKIF_BE)
- {
- printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
- return 0;
- }
-
- switch(msg->subtype)
- {
- case CMSG_BLKIF_BE_CREATE:
- if ( msg->length != sizeof(blkif_be_create_t) )
- goto parse_error;
- printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
- ((blkif_be_create_t *)msg->msg)->domid,
- ((blkif_be_create_t *)msg->msg)->blkif_handle);
- domid = ((blkif_be_create_t *)msg->msg)->domid;
- if (images[domid] != NULL) {
- printf("attempt to connect from an existing dom!\n");
- return 0;
- }
-
- images[domid] = (image_t *)malloc(sizeof(image_t));
- if (images[domid] == NULL) {
- printf("error allocating image record.\n");
- return 0;
- }
-
- images[domid]->img = NULL;
- images[domid]->fsid = 0;
-
- printf("Image connected.\n");
- break;
-
- case CMSG_BLKIF_BE_DESTROY:
- if ( msg->length != sizeof(blkif_be_destroy_t) )
- goto parse_error;
- printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
- ((blkif_be_destroy_t *)msg->msg)->domid,
- ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
-
- domid = ((blkif_be_destroy_t *)msg->msg)->domid;
- if (images[domid] != NULL) {
- if (images[domid]->img != NULL)
- fclose( images[domid]->img );
- free( images[domid] );
- images[domid] = NULL;
- }
- break;
- case CMSG_BLKIF_BE_VBD_GROW:
- {
- blkif_be_vbd_grow_t *grow;
-
- if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
- goto parse_error;
- printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
- ((blkif_be_vbd_grow_t *)msg->msg)->domid,
- ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
- ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
- printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
- ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
- ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
- ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
- grow = (blkif_be_vbd_grow_t *)msg->msg;
- domid = grow->domid;
- if (images[domid] == NULL) {
- printf("VBD_GROW on unconnected domain!\n");
- return 0;
- }
-
- if (grow->extent.device != AMORFS_DEV) {
- printf("VBD_GROW on non-amorfs device!\n");
- return 0;
- }
-
- /* TODO: config support for arbitrary image files/modes. */
- sprintf(images[domid]->imgname, TMP_IMAGE_FILE_NAME);
-
- images[domid]->fsid = grow->extent.sector_start;
- images[domid]->vdevice = grow->vdevice;
- images[domid]->img = fopen64(TMP_IMAGE_FILE_NAME, "r+");
- if (images[domid]->img == NULL) {
- printf("Couldn't open image file!\n");
- return 0;
- }
-
- printf("Image file opened. (%s)\n", images[domid]->imgname);
- break;
- }
- }
- return 0;
-parse_error:
- printf("Bad control message!\n");
- return 0;
-
-create_failed:
- /* TODO: close the db ref. */
- return 0;
-}
-
-int image_request(blkif_request_t *req)
-{
- FILE *img;
- u64 sector;
- char *spage, *dpage;
- int ret, i, idx;
- blkif_response_t *rsp;
- domid_t dom = ID_TO_DOM(req->id);
-
- if ((images[dom] == NULL) || (images[dom]->img == NULL)) {
- printf("Data request for unknown domain!!! %d\n", dom);
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = req->operation;
- rsp->status = BLKIF_RSP_ERROR;
- return BLKTAP_RESPOND;
- }
-
- img = images[dom]->img;
-
- switch (req->operation)
- {
- case BLKIF_OP_PROBE:
- {
- int fd;
- struct stat stat;
- vdisk_t *img_info;
-
-
- /* We expect one buffer only. */
- if ( req->nr_segments != 1 )
- goto err;
-
- /* Make sure the buffer is page-sized. */
- if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
- (blkif_last_sect (req->frame_and_sects[0]) != 7) )
- goto err;
-
- /* loop for multiple images would start here. */
-
- fd = fileno(img);
- if (fd == -1) {
- printf("Couldn't get image fd in PROBE!\n");
- goto err;
- }
-
- ret = fstat(fd, &stat);
- if (ret != 0) {
- printf("Couldn't stat image in PROBE!\n");
- goto err;
- }
-
- img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
- img_info[0].device = images[dom]->vdevice;
- img_info[0].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
- img_info[0].capacity = (stat.st_size >> SECTOR_SHIFT);
-
- if (img_info[0].capacity == 0)
- img_info[0].capacity = ((u64)1 << 63); // xend does this too.
-
- DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", img_info[0].device,
- img_info[0].capacity);
-
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_PROBE;
- rsp->status = 1; /* number of disks */
-
- return BLKTAP_RESPOND;
- }
- case BLKIF_OP_WRITE:
- {
- unsigned long size;
-
- for (i = 0; i < req->nr_segments; i++) {
-
- sector = req->sector_number + (8*i);
-
- size = blkif_last_sect (req->frame_and_sects[i]) -
- blkif_first_sect(req->frame_and_sects[i]) + 1;
-
- ret = fseeko64(img, (off_t)(sector << SECTOR_SHIFT), SEEK_SET);
- if (ret != 0) {
- printf("fseek error on WRITE\n");
- goto err;
- }
-
- DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n",
- req->sector_number, sector,
- blkif_first_sect(req->frame_and_sects[i]),
- blkif_last_sect (req->frame_and_sects[i]),
- (long)(sector << SECTOR_SHIFT));
-
- spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
- spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
- ret = fwrite(spage, size << SECTOR_SHIFT, 1, img);
- if (ret != 1) {
- printf("fwrite error on WRITE (%d)\n", errno);
- goto err;
- }
- }
-
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_WRITE;
- rsp->status = BLKIF_RSP_OKAY;
-
- return BLKTAP_RESPOND;
- }
- case BLKIF_OP_READ:
- {
- unsigned long size;
-
- for (i = 0; i < req->nr_segments; i++) {
-
- sector = req->sector_number + (8*i);
-
- size = blkif_last_sect (req->frame_and_sects[i]) -
- blkif_first_sect(req->frame_and_sects[i]) + 1;
-
- ret = fseeko64(img, (off_t)(sector << SECTOR_SHIFT), SEEK_SET);
- if (ret != 0) {
- printf("fseek error on READ\n");
- goto err;
- }
-
- DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n",
- req->sector_number, sector,
- blkif_first_sect(req->frame_and_sects[i]),
- blkif_last_sect (req->frame_and_sects[i]),
- (long)(sector << SECTOR_SHIFT));
-
- dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
- dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
- ret = fread(dpage, size << SECTOR_SHIFT, 1, img);
- if (ret != 1) {
- printf("fread error on READ\n");
- goto err;
- }
- }
-
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_READ;
- rsp->status = BLKIF_RSP_OKAY;
- return BLKTAP_RESPOND;
- }
- }
-
- printf("Unknow block operation!\n");
-err:
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = req->operation;
- rsp->status = BLKIF_RSP_ERROR;
- return BLKTAP_RESPOND;
-}
-
-/* the image library terminates the request stream. _resp is a noop. */
-int image_response(blkif_response_t *rsp)
-{
- return BLKTAP_PASS;
-}
-
-void image_init(void)
-{
- int i;
-
- for (i = 0; i < MAX_DOMS; i++)
- images[i] = NULL;
-}
-
+++ /dev/null
-/* blkimglib.h
- *
- * file image-backed block device.
- *
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- */
-
-int image_control(control_msg_t *msg);
-int image_request(blkif_request_t *req);
-int image_response(blkif_response_t *rsp); /* noop */
-void image_init(void);
--- /dev/null
+/* block-async.c\r
+ * \r
+ * Asynchronous block wrappers for parallax.\r
+ */\r
+ \r
+ \r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include <string.h>\r
+#include <pthread.h>\r
+#include "block-async.h"\r
+#include "blockstore.h"\r
+#include "vdi.h"\r
+\r
+\r
+#if 0\r
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )\r
+#else\r
+#define DPRINTF(_f, _a...) ((void)0)\r
+#endif\r
+\r
+/* We have a queue of outstanding I/O requests implemented as a \r
+ * circular producer-consumer ring with free-running buffers.\r
+ * to allow reordering, this ring indirects to indexes in an \r
+ * ring of io_structs.\r
+ * \r
+ * the block_* calls may either add an entry to this ring and return, \r
+ * or satisfy the request immediately and call the callback directly.\r
+ * None of the io calls in parallax should be nested enough to worry \r
+ * about stack problems with this approach.\r
+ */\r
+\r
+struct read_args {\r
+ u64 addr;\r
+};\r
+\r
+struct write_args {\r
+ u64 addr;\r
+ char *block;\r
+};\r
+\r
+struct alloc_args {\r
+ char *block;\r
+};\r
+ \r
+struct pending_io_req {\r
+ enum {IO_READ, IO_WRITE, IO_ALLOC, IO_RWAKE, IO_WWAKE} op;\r
+ union {\r
+ struct read_args r;\r
+ struct write_args w;\r
+ struct alloc_args a;\r
+ } u;\r
+ io_cb_t cb;\r
+ void *param;\r
+};\r
+\r
+void radix_lock_init(struct radix_lock *r)\r
+{\r
+ int i;\r
+ \r
+ pthread_mutex_init(&r->lock, NULL);\r
+ for (i=0; i < 1024; i++) {\r
+ r->lines[i] = 0;\r
+ r->waiters[i] = NULL;\r
+ r->state[i] = ANY;\r
+ }\r
+}\r
+\r
+/* maximum outstanding I/O requests issued asynchronously */\r
+/* must be a power of 2.*/\r
+#define MAX_PENDING_IO 1024 //1024\r
+\r
+/* how many threads to concurrently issue I/O to the disk. */\r
+#define IO_POOL_SIZE 10 //10\r
+\r
+static struct pending_io_req pending_io_reqs[MAX_PENDING_IO];\r
+static int pending_io_list[MAX_PENDING_IO];\r
+static unsigned long io_prod = 0, io_cons = 0, io_free = 0;\r
+#define PENDING_IO_MASK(_x) ((_x) & (MAX_PENDING_IO - 1))\r
+#define PENDING_IO_IDX(_x) ((_x) - pending_io_reqs)\r
+#define PENDING_IO_ENT(_x) \\r
+ (&pending_io_reqs[pending_io_list[PENDING_IO_MASK(_x)]])\r
+#define CAN_PRODUCE_PENDING_IO ((io_free + MAX_PENDING_IO) != io_prod)\r
+#define CAN_CONSUME_PENDING_IO (io_cons != io_prod)\r
+static pthread_mutex_t pending_io_lock = PTHREAD_MUTEX_INITIALIZER;\r
+static pthread_cond_t pending_io_cond = PTHREAD_COND_INITIALIZER;\r
+\r
+static void init_pending_io(void)\r
+{\r
+ int i;\r
+ \r
+ for (i=0; i<MAX_PENDING_IO; i++)\r
+ pending_io_list[i] = i;\r
+ \r
+} \r
+\r
+void block_read(u64 addr, io_cb_t cb, void *param)\r
+{\r
+ struct pending_io_req *req;\r
+ \r
+ pthread_mutex_lock(&pending_io_lock);\r
+ assert(CAN_PRODUCE_PENDING_IO);\r
+\r
+ req = PENDING_IO_ENT(io_prod++);\r
+ DPRINTF("Produce (R) %lu (%p)\n", io_prod - 1, req);\r
+ req->op = IO_READ;\r
+ req->u.r.addr = addr;\r
+ req->cb = cb;\r
+ req->param = param;\r
+ \r
+ pthread_cond_signal(&pending_io_cond);\r
+ pthread_mutex_unlock(&pending_io_lock); \r
+}\r
+\r
+\r
+void block_write(u64 addr, char *block, io_cb_t cb, void *param)\r
+{\r
+ struct pending_io_req *req;\r
+ \r
+ pthread_mutex_lock(&pending_io_lock);\r
+ assert(CAN_PRODUCE_PENDING_IO);\r
+\r
+ req = PENDING_IO_ENT(io_prod++);\r
+ DPRINTF("Produce (W) %lu (%p)\n", io_prod - 1, req);\r
+ req->op = IO_WRITE;\r
+ req->u.w.addr = addr;\r
+ req->u.w.block = block;\r
+ req->cb = cb;\r
+ req->param = param;\r
+ \r
+ pthread_cond_signal(&pending_io_cond);\r
+ pthread_mutex_unlock(&pending_io_lock); \r
+}\r
+\r
+\r
+void block_alloc(char *block, io_cb_t cb, void *param)\r
+{\r
+ struct pending_io_req *req;\r
+ \r
+ pthread_mutex_lock(&pending_io_lock);\r
+ assert(CAN_PRODUCE_PENDING_IO);\r
+\r
+ req = PENDING_IO_ENT(io_prod++);\r
+ req->op = IO_ALLOC;\r
+ req->u.a.block = block;\r
+ req->cb = cb;\r
+ req->param = param;\r
+ \r
+ pthread_cond_signal(&pending_io_cond);\r
+ pthread_mutex_unlock(&pending_io_lock); \r
+}\r
+\r
+void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param)\r
+{\r
+ struct io_ret ret;\r
+ pthread_mutex_lock(&r->lock);\r
+ \r
+ if (( r->lines[row] >= 0 ) && (r->state[row] != STOP)) {\r
+ r->lines[row]++;\r
+ r->state[row] = READ;\r
+ DPRINTF("RLOCK : %3d (row: %d)\n", r->lines[row], row);\r
+ pthread_mutex_unlock(&r->lock);\r
+ ret.type = IO_INT_T;\r
+ ret.u.i = 0;\r
+ cb(ret, param);\r
+ } else {\r
+ struct radix_wait **rwc;\r
+ struct radix_wait *rw = \r
+ (struct radix_wait *) malloc (sizeof(struct radix_wait));\r
+ DPRINTF("RLOCK : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);\r
+ rw->type = RLOCK;\r
+ rw->param = param;\r
+ rw->cb = cb;\r
+ rw->next = NULL;\r
+ /* append to waiters list. */\r
+ rwc = &r->waiters[row];\r
+ while (*rwc != NULL) rwc = &(*rwc)->next;\r
+ *rwc = rw;\r
+ pthread_mutex_unlock(&r->lock);\r
+ return;\r
+ }\r
+}\r
+\r
+\r
+void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param)\r
+{\r
+ struct io_ret ret;\r
+ pthread_mutex_lock(&r->lock);\r
+ \r
+ /* the second check here is redundant -- just here for debugging now. */\r
+ if ((r->state[row] == ANY) && ( r->lines[row] == 0 )) {\r
+ r->state[row] = STOP;\r
+ r->lines[row] = -1;\r
+ DPRINTF("WLOCK : %3d (row: %d)\n", r->lines[row], row);\r
+ pthread_mutex_unlock(&r->lock);\r
+ ret.type = IO_INT_T;\r
+ ret.u.i = 0;\r
+ cb(ret, param);\r
+ } else {\r
+ struct radix_wait **rwc;\r
+ struct radix_wait *rw = \r
+ (struct radix_wait *) malloc (sizeof(struct radix_wait));\r
+ DPRINTF("WLOCK : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);\r
+ rw->type = WLOCK;\r
+ rw->param = param;\r
+ rw->cb = cb;\r
+ rw->next = NULL;\r
+ /* append to waiters list. */\r
+ rwc = &r->waiters[row];\r
+ while (*rwc != NULL) rwc = &(*rwc)->next;\r
+ *rwc = rw;\r
+ pthread_mutex_unlock(&r->lock);\r
+ return;\r
+ }\r
+ \r
+}\r
+\r
+/* called with radix_lock locked and lock count of zero. */\r
+static void wake_waiters(struct radix_lock *r, int row)\r
+{\r
+ struct pending_io_req *req;\r
+ struct radix_wait *rw;\r
+ \r
+ DPRINTF("prewake\n");\r
+ if (r->lines[row] != 0) return;\r
+ if (r->waiters[row] == NULL) {DPRINTF("nowaiters!\n");return;} \r
+ \r
+ DPRINTF("wake\n");\r
+ if (r->waiters[row]->type == WLOCK) {\r
+ rw = r->waiters[row];\r
+ pthread_mutex_lock(&pending_io_lock);\r
+ assert(CAN_PRODUCE_PENDING_IO);\r
+\r
+ req = PENDING_IO_ENT(io_prod++);\r
+ DPRINTF("Produce (WWAKE) %lu (%p)\n", io_prod - 1, req);\r
+ req->op = IO_WWAKE;\r
+ req->cb = rw->cb;\r
+ req->param = rw->param;\r
+ r->lines[row] = -1; /* write lock the row. */\r
+ r->state[row] = STOP;\r
+ r->waiters[row] = rw->next;\r
+ free(rw);\r
+ pthread_mutex_unlock(&pending_io_lock);\r
+ } else /* RLOCK */ {\r
+ while ((r->waiters[row] != NULL) && (r->waiters[row]->type == RLOCK)) {\r
+ rw = r->waiters[row];\r
+ pthread_mutex_lock(&pending_io_lock);\r
+ assert(CAN_PRODUCE_PENDING_IO);\r
+ \r
+ req = PENDING_IO_ENT(io_prod++);\r
+ DPRINTF("Produce (RWAKE) %lu (%p)\n", io_prod - 1, req);\r
+ req->op = IO_RWAKE;\r
+ req->cb = rw->cb;\r
+ req->param = rw->param;\r
+ r->lines[row]++; /* read lock the row. */\r
+ r->state[row] = READ; \r
+ r->waiters[row] = rw->next;\r
+ free(rw);\r
+ pthread_mutex_unlock(&pending_io_lock);\r
+ }\r
+ if (r->waiters[row] != NULL) /* There is a write queued still */\r
+ r->state[row] = STOP;\r
+ } \r
+ \r
+ DPRINTF("wakedone\n");\r
+ DPRINTF("prod: %lu cons: %lu free: %lu\n", io_prod, io_cons, io_free);\r
+ pthread_mutex_lock(&pending_io_lock);\r
+ pthread_cond_signal(&pending_io_cond);\r
+ pthread_mutex_unlock(&pending_io_lock);\r
+}\r
+\r
+void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param)\r
+{\r
+ struct io_ret ret;\r
+ \r
+ pthread_mutex_lock(&r->lock);\r
+ assert(r->lines[row] > 0); /* try to catch misuse. */\r
+ r->lines[row]--;\r
+ DPRINTF("RUNLOCK: %3d (row: %d)\n", r->lines[row], row);\r
+ if (r->lines[row] == 0) {\r
+ r->state[row] = ANY;\r
+ wake_waiters(r, row);\r
+ }\r
+ pthread_mutex_unlock(&r->lock);\r
+ cb(ret, param);\r
+}\r
+\r
+void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param)\r
+{\r
+ struct io_ret ret;\r
+ \r
+ pthread_mutex_lock(&r->lock);\r
+ assert(r->lines[row] == -1); /* try to catch misuse. */\r
+ r->lines[row] = 0;\r
+ r->state[row] = ANY;\r
+ DPRINTF("WUNLOCK: %3d (row: %d)\n", r->lines[row], row);\r
+ wake_waiters(r, row);\r
+ pthread_mutex_unlock(&r->lock);\r
+ cb(ret, param);\r
+}\r
+\r
+/* consumer calls */\r
+static void do_next_io_req(struct pending_io_req *req)\r
+{\r
+ struct io_ret ret;\r
+ void *param;\r
+ \r
+ switch (req->op) {\r
+ case IO_READ:\r
+ ret.type = IO_BLOCK_T;\r
+ ret.u.b = readblock(req->u.r.addr);\r
+ break;\r
+ case IO_WRITE:\r
+ ret.type = IO_INT_T;\r
+ ret.u.i = writeblock(req->u.w.addr, req->u.w.block);\r
+ DPRINTF("wrote %d at %Lu\n", *(int *)(req->u.w.block), req->u.w.addr);\r
+ break;\r
+ case IO_ALLOC:\r
+ ret.type = IO_ADDR_T;\r
+ ret.u.a = allocblock(req->u.a.block);\r
+ break;\r
+ case IO_RWAKE:\r
+ DPRINTF("WAKE DEFERRED RLOCK!\n");\r
+ ret.type = IO_INT_T;\r
+ ret.u.i = 0;\r
+ break;\r
+ case IO_WWAKE:\r
+ DPRINTF("WAKE DEFERRED WLOCK!\n");\r
+ ret.type = IO_INT_T;\r
+ ret.u.i = 0;\r
+ break;\r
+ default:\r
+ DPRINTF("Unknown IO operation on pending list!\n");\r
+ return;\r
+ }\r
+ \r
+ param = req->param;\r
+ DPRINTF("freeing idx %d to slot %lu.\n", PENDING_IO_IDX(req), PENDING_IO_MASK(io_free));\r
+ pthread_mutex_lock(&pending_io_lock);\r
+ pending_io_list[PENDING_IO_MASK(io_free++)] = PENDING_IO_IDX(req);\r
+ DPRINTF(" : prod: %lu cons: %lu free: %lu\n", io_prod, io_cons, io_free);\r
+ pthread_mutex_unlock(&pending_io_lock);\r
+ \r
+ assert(req->cb != NULL);\r
+ req->cb(ret, param);\r
+ \r
+}\r
+\r
+void *io_thread(void *param) \r
+{\r
+ int tid;\r
+ struct pending_io_req *req;\r
+ \r
+ /* Set this thread's tid. */\r
+ tid = *(int *)param;\r
+ free(param);\r
+ \r
+ DPRINTF("IOT %2d started.\n", tid);\r
+ \r
+start:\r
+ pthread_mutex_lock(&pending_io_lock);\r
+ while (io_prod == io_cons) {\r
+ pthread_cond_wait(&pending_io_cond, &pending_io_lock);\r
+ }\r
+ \r
+ if (io_prod == io_cons) {\r
+ /* unnecessary wakeup. */\r
+ pthread_mutex_unlock(&pending_io_lock);\r
+ goto start;\r
+ }\r
+ \r
+ req = PENDING_IO_ENT(io_cons++);\r
+ DPRINTF("IOT %2d has req %04d(%p).\n", tid, PENDING_IO_IDX(req), req);\r
+ DPRINTF(" : prod: %lu cons: %lu free: %lu\n", io_prod, io_cons, io_free);\r
+ pthread_mutex_unlock(&pending_io_lock);\r
+ \r
+ \r
+ do_next_io_req(req);\r
+ \r
+ goto start;\r
+ \r
+}\r
+\r
+static pthread_t io_pool[IO_POOL_SIZE];\r
+void start_io_threads(void)\r
+\r
+{ \r
+ int i, tid=0;\r
+ \r
+ for (i=0; i < IO_POOL_SIZE; i++) {\r
+ int ret, *t;\r
+ t = (int *)malloc(sizeof(int));\r
+ *t = tid++;\r
+ ret = pthread_create(&io_pool[i], NULL, io_thread, t);\r
+ if (ret != 0) printf("Error starting thread %d\n", i);\r
+ }\r
+ \r
+}\r
+\r
+void init_block_async(void)\r
+{\r
+ init_pending_io();\r
+ start_io_threads();\r
+}\r
--- /dev/null
+/* block-async.h\r
+ * \r
+ * Asynchronous block wrappers for parallax.\r
+ */\r
+ \r
+#ifndef _BLOCKASYNC_H_\r
+#define _BLOCKASYNC_H_\r
+\r
+#include <assert.h>\r
+#include <xc.h>\r
+#include "vdi.h"\r
+\r
+struct io_ret\r
+{\r
+ enum {IO_ADDR_T, IO_BLOCK_T, IO_INT_T} type;\r
+ union {\r
+ u64 a;\r
+ char *b;\r
+ int i;\r
+ } u;\r
+};\r
+\r
+typedef void (*io_cb_t)(struct io_ret r, void *param);\r
+\r
+/* per-vdi lock structures to make sure requests run in a safe order. */\r
+struct radix_wait {\r
+ enum {RLOCK, WLOCK} type;\r
+ io_cb_t cb;\r
+ void *param;\r
+ struct radix_wait *next;\r
+};\r
+\r
+struct radix_lock {\r
+ pthread_mutex_t lock;\r
+ int lines[1024];\r
+ struct radix_wait *waiters[1024];\r
+ enum {ANY, READ, STOP} state[1024];\r
+};\r
+void radix_lock_init(struct radix_lock *r);\r
+\r
+void block_read(u64 addr, io_cb_t cb, void *param);\r
+void block_write(u64 addr, char *block, io_cb_t cb, void *param);\r
+void block_alloc(char *block, io_cb_t cb, void *param);\r
+void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param);\r
+void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param);\r
+void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param);\r
+void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param);\r
+void init_block_async(void);\r
+\r
+static inline u64 IO_ADDR(struct io_ret r)\r
+{\r
+ assert(r.type == IO_ADDR_T);\r
+ return r.u.a;\r
+}\r
+\r
+static inline char *IO_BLOCK(struct io_ret r)\r
+{\r
+ assert(r.type == IO_BLOCK_T);\r
+ return r.u.b;\r
+}\r
+\r
+static inline int IO_INT(struct io_ret r)\r
+{\r
+ assert(r.type == IO_INT_T);\r
+ return r.u.i;\r
+}\r
+\r
+\r
+#endif //_BLOCKASYNC_H_\r
+++ /dev/null
-/**************************************************************************
- *
- * blockstore.c
- *
- * Simple block store interface
- *
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include "blockstore.h"
-#include "parallax-threaded.h"
-
-/*static int block_fp = -1;*/
-
-static int fd_list[READ_POOL_SIZE+1];
-
-/**
- * readblock: read a block from disk
- * @id: block id to read
- *
- * @return: pointer to block, NULL on error
- */
-
-void *readblock(u64 id)
-{
- void *block;
- int tid = (int)pthread_getspecific(tid_key);
-
- if (lseek64(fd_list[tid], ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
- printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
- perror("readblock lseek");
- goto err;
- }
- if ((block = malloc(BLOCK_SIZE)) == NULL) {
- perror("readblock malloc");
- goto err;
- }
- if (read(fd_list[tid], block, BLOCK_SIZE) != BLOCK_SIZE) {
- perror("readblock read");
- free(block);
- goto err;
- }
- return block;
-
-err:
- return NULL;
-}
-
-/**
- * writeblock: write an existing block to disk
- * @id: block id
- * @block: pointer to block
- *
- * @return: zero on success, -1 on failure
- */
-int writeblock(u64 id, void *block)
-{
- int tid = (int)pthread_getspecific(tid_key);
-
- if (lseek64(fd_list[tid], ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
- perror("writeblock lseek");
- goto err;
- }
- if (write(fd_list[tid], block, BLOCK_SIZE) < 0) {
- perror("writeblock write");
- goto err;
- }
- return 0;
-
-err:
- return -1;
-}
-
-/**
- * allocblock: write a new block to disk
- * @block: pointer to block
- *
- * @return: new id of block on disk
- */
-
-u64 allocblock(void *block)
-{
- u64 lb;
- off64_t pos;
- int tid = (int)pthread_getspecific(tid_key);
-
- pos = lseek64(fd_list[tid], 0, SEEK_END);
- if (pos == (off64_t)-1) {
- perror("allocblock lseek");
- goto err;
- }
- if (pos % BLOCK_SIZE != 0) {
- fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
- goto err;
- }
- if (write(fd_list[tid], block, BLOCK_SIZE) != BLOCK_SIZE) {
- perror("allocblock write");
- goto err;
- }
- lb = pos / BLOCK_SIZE + 1;
-
- return lb;
-
-err:
- return 0;
-
-}
-
-
-/**
- * newblock: get a new in-memory block set to zeros
- *
- * @return: pointer to new block, NULL on error
- */
-void *newblock()
-{
- void *block = malloc(BLOCK_SIZE);
- if (block == NULL) {
- perror("newblock");
- return NULL;
- }
- memset(block, 0, BLOCK_SIZE);
- return block;
-}
-
-
-/**
- * freeblock: unallocate an in-memory block
- * @id: block id (zero if this is only in-memory)
- * @block: block to be freed
- */
-void freeblock(void *block)
-{
- if (block != NULL)
- free(block);
-}
-
-
-int __init_blockstore(void)
-{
- int i;
-
- for (i=0; i<(READ_POOL_SIZE+1); i++) {
-
- fd_list[i] = open("blockstore.dat",
- O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
- if (fd_list[i] < 0) {
- perror("open");
- return -1;
- }
- }
- return 0;
-}
#include <pthread.h>
#include "parallax-threaded.h"
-#define BLOCKSTORE_REMOTE
+//#define BLOCKSTORE_REMOTE
//#define BSDEBUG
#define RETRY_TIMEOUT 1000000 /* microseconds */
void *readblock(u64 id) {
void *block;
int block_fp;
-
+
+//printf("readblock(%llu)\n", id);
block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644);
if (block_fp < 0) {
void __exit_blockstore(void)
{
int i;
+#ifdef BLOCKSTORE_REMOTE
pthread_mutex_destroy(&ptmutex_recv);
pthread_mutex_destroy(&ptmutex_luid);
pthread_mutex_destroy(&ptmutex_queue);
pthread_mutex_destroy(&(pool_thread[i].ptmutex));
pthread_cond_destroy(&(pool_thread[i].ptcv));
}
+#endif
}
+++ /dev/null
-
-CFLAGS += -Wall -Werror -g
-LDFLAGS += -g
-
-libgnbd.a: libgnbd.o
- $(AR) r $@ $<
-
-gnbdtest: gnbdtest.o libgnbd.a
+++ /dev/null
-
-#include <err.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <sys/poll.h>
-
-#include "libgnbd.h"
-
-#define PRINTF(x) printf x
-#if 0
-#define DFPRINTF(x...) fprintf(stderr, ##x)
-#define DPRINTF(x) DFPRINTF x
-#else
-#define DPRINTF(x)
-#endif
-
-static unsigned char buf1[8 << 9];
-static unsigned char buf2[8 << 9];
-static unsigned char buf3[8 << 9];
-
-int
-main(int argc, char **argv)
-{
- struct gnbd_handle *gh;
- struct pollfd pfd[1];
- int err, tout;
-
- gh = gnbd_setup("panik", 0x38e7, "cl349-nahant-beta2-root1",
- "arcadians.cl.cam.ac.uk");
- if (gh == NULL)
- errx(1, "gnbd_setup");
-
- memset(pfd, 0, sizeof(pfd));
- pfd[0].fd = gnbd_fd(gh);
- pfd[0].events = POLLIN;
-
- while ((tout = poll(pfd, 1, 0)) >= 0) {
- if (tout == 0)
- continue;
- DPRINTF(("event\n"));
- if (pfd[0].revents) {
- err = gnbd_reply(gh);
- pfd[0].events = POLLIN;
- switch (err) {
- case GNBD_LOGIN_DONE:
- DPRINTF(("sectors: %08llu\n",
- gnbd_sectors(gh)));
- err = gnbd_read(gh, 8, 8, buf2, 1);
- if (err)
- warnx("gnbd_read");
- err = gnbd_read(gh, 0, 8, buf1, 0);
- if (err)
- warnx("gnbd_read");
- err = gnbd_read(gh, 16, 8, buf3, 2);
- if (err)
- warnx("gnbd_read");
- break;
- case GNBD_REQUEST_DONE:
- DPRINTF(("request done %ld\n",
- gnbd_finished_request(gh)));
- if (0 && gnbd_finished_request(gh) == 0) {
- write(1, buf1, 8 << 9);
- err = gnbd_write(gh, 0, 8, buf1, 10);
- if (err)
- warnx("gnbd_write");
- }
- break;
- case GNBD_CONTINUE:
- DPRINTF(("continue\n"));
- break;
- case 0:
- break;
- case GNBD_CONTINUE_WRITE:
- DPRINTF(("continue write\n"));
- pfd[0].events |= POLLOUT;
- break;
- default:
- warnx("gnbd_reply error");
- break;
- }
- DPRINTF(("got gnbd reply\n"));
- }
- }
-
- return 0;
-}
+++ /dev/null
-/* libgnbd.c
- *
- * gnbd client library
- *
- * Copyright (c) 2005, Christian Limpach
- */
-
-#include <byteswap.h>
-#include <endian.h>
-#include <err.h>
-#include <errno.h>
-#include <netdb.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/types.h>
-
-#include <stdio.h>
-
-#include "libgnbd.h"
-
-#define PROTOCOL_VERSION 2
-
-#define EXTERN_KILL_GSERV_REQ 5
-#define EXTERN_LOGIN_REQ 6
-
-#define GNBD_REQUEST_MAGIC 0x37a07e00
-#define GNBD_KEEP_ALIVE_MAGIC 0x5b46d8c2
-#define GNBD_REPLY_MAGIC 0x41f09370
-
-enum {
- GNBD_CMD_READ = 0,
- GNBD_CMD_WRITE = 1,
- GNBD_CMD_DISC = 2,
- GNBD_CMD_PING = 3
-};
-
-#if __BYTE_ORDER == __BIG_ENDIAN
-#define htonll(x) (x)
-#define ntohll(x) (x)
-#endif
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-#define htonll(x) bswap_64(x)
-#define ntohll(x) bswap_64(x)
-#endif
-
-#define PRINTF(x) printf x
-#if 0
-#define DFPRINTF(x...) fprintf(stderr, ##x)
-#define DPRINTF(x) DFPRINTF x
-#else
-#define DPRINTF(x)
-#endif
-
-struct gnbd_request {
- struct gnbd_request *gr_next;
- unsigned char *gr_buf;
- ssize_t gr_size;
- ssize_t gr_done;
- unsigned long gr_cookie;
-};
-
-struct gnbd_handle {
- int gh_fd;
- unsigned int gh_flags;
- uint64_t gh_sectors;
- char gh_devname[32];
- char gh_nodename[65];
- struct sockaddr_in gh_sin;
- struct gnbd_request *gh_outstanding_requests;
- struct gnbd_request **gh_outstanding_requests_last;
- struct gnbd_request *gh_incoming_request;
- unsigned long gh_finished_request;
-};
-#define GHF_EXPECT_KILL_GSERV_REPLY 0x0001
-#define GHF_EXPECT_LOGIN_REPLY 0x0002
-#define GHF_INCOMING_REQUEST 0x0004
-
-struct device_req {
- char name[32];
-};
-
-struct node_req {
- char node_name[65];
-};
-
-struct login_req {
- uint64_t timestamp;
- uint16_t version;
- uint8_t pad[6];
- char devname[32];
-};
-
-struct login_reply {
- uint64_t sectors;
- uint16_t version;
- uint8_t err;
- uint8_t pad[5];
-};
-
-struct gnbd_server_request {
- uint32_t magic;
- uint32_t type;
- char handle[8];
- uint64_t from;
- uint32_t len;
-} __attribute__ ((packed));
-
-struct gnbd_server_reply {
- uint32_t magic;
- uint32_t error;
- char handle[8];
-} __attribute__ ((packed));
-
-static int
-read_buf(int fd, void *buf, size_t count, size_t *read_count)
-{
- int err;
-
- err = read(fd, buf, count);
- if (read_count) {
- if (err >= 0)
- *read_count = err;
- } else if (err != count)
- return EINTR; /* xxx */
- return err < 0;
-}
-
-static int
-read_4(int fd, unsigned long *val)
-{
- unsigned long buf;
- int err;
-
- err = read_buf(fd, &buf, sizeof(buf), NULL);
- if (err == 0)
- *val = ntohl(buf);
- return err;
-}
-
-static int
-write_buf(int fd, void *buf, size_t count)
-{
- int err;
-
- err = write(fd, buf, count);
- return err < 0;
-}
-
-static int
-write_4(int fd, unsigned long val)
-{
- unsigned long buf;
- int err;
-
- buf = htonl(val);
- err = write_buf(fd, &buf, sizeof(buf));
- return err;
-}
-
-
-static int
-socket_connect(struct gnbd_handle *gh)
-{
- int err;
-
- if (gh->gh_fd >= 0)
- return 0;
-
- gh->gh_fd = socket(PF_INET, SOCK_STREAM, 0);
- if (gh->gh_fd < 0) {
- warn("socket");
- return gh->gh_fd;
- }
-
- err = connect(gh->gh_fd, (struct sockaddr *)&gh->gh_sin,
- sizeof(gh->gh_sin));
- if (err) {
- warn("connect");
- goto out;
- }
-
- return 0;
- out:
- close (gh->gh_fd);
- gh->gh_fd = -1;
- return err;
-}
-
-static int
-socket_shutdown(struct gnbd_handle *gh)
-{
-
- close (gh->gh_fd);
- gh->gh_fd = -1;
- return 0;
-}
-
-static int
-find_request(struct gnbd_handle *gh, struct gnbd_request *gr)
-{
- struct gnbd_request **tmp;
-
- for (tmp = &gh->gh_outstanding_requests; *tmp;
- tmp = &(*tmp)->gr_next) {
- if (*tmp == gr) {
- *tmp = (*tmp)->gr_next;
- if (*tmp == NULL)
- gh->gh_outstanding_requests_last = tmp;
- return 0;
- }
- }
- return ENOENT;
-}
-
-static int
-kill_gserv(struct gnbd_handle *gh)
-{
- struct device_req dr;
- struct node_req nr;
- int err;
-
- DPRINTF(("gnbd_kill_gserv\n"));
- err = socket_connect(gh);
- if (err) {
- warnx("socket_connect");
- return err;
- }
-
- err = write_4(gh->gh_fd, EXTERN_KILL_GSERV_REQ);
- if (err) {
- warnx("send EXTERN_LOGIN_REQ failed");
- goto out;
- }
-
- strncpy(dr.name, gh->gh_devname, sizeof(dr.name));
- err = write_buf(gh->gh_fd, &dr, sizeof(dr));
- if (err) {
- warnx("send device_req failed");
- goto out;
- }
-
- strncpy(nr.node_name, gh->gh_nodename, sizeof(nr.node_name));
- err = write_buf(gh->gh_fd, &nr, sizeof(nr));
- if (err) {
- warnx("send node_req failed");
- goto out;
- }
-
- gh->gh_flags |= GHF_EXPECT_KILL_GSERV_REPLY;
- DPRINTF(("gnbd_kill_gserv ok\n"));
-
- return 0;
- out:
- socket_shutdown(gh);
- return err;
-}
-
-static int
-login(struct gnbd_handle *gh)
-{
- struct login_req lr;
- struct node_req nr;
- int err;
- uint64_t timestamp;
- struct timeval tv;
-
- DPRINTF(("gnbd_login\n"));
- err = socket_connect(gh);
- if (err) {
- warnx("socket_connect");
- return err;
- }
-
- err = write_4(gh->gh_fd, EXTERN_LOGIN_REQ);
- if (err) {
- warnx("send EXTERN_LOGIN_REQ failed");
- goto out;
- }
-
- err = gettimeofday(&tv, NULL);
- if (err) {
- warnx("gettimeofday");
- goto out;
- }
- timestamp = (uint64_t)tv.tv_sec * 1000000 + tv.tv_usec;
-
- lr.timestamp = htonll(timestamp);
- lr.version = htons(PROTOCOL_VERSION);
- strncpy(lr.devname, gh->gh_devname, sizeof(lr.devname));
- err = write_buf(gh->gh_fd, &lr, sizeof(lr));
- if (err) {
- warnx("send login_req failed");
- goto out;
- }
-
- strncpy(nr.node_name, gh->gh_nodename, sizeof(nr.node_name));
- err = write_buf(gh->gh_fd, &nr, sizeof(nr));
- if (err) {
- warnx("send node_req failed");
- goto out;
- }
-
- gh->gh_flags |= GHF_EXPECT_LOGIN_REPLY;
-
- DPRINTF(("gnbd_login ok\n"));
- return 0;
- out:
- socket_shutdown(gh);
- return err;
-}
-
-static int
-kill_gserv_reply(struct gnbd_handle *gh)
-{
- unsigned long reply;
- int err;
-
- DPRINTF(("read gnbd_kill_gserv_reply\n"));
- err = read_4(gh->gh_fd, &reply);
- if (err) {
- warnx("read kill_gserv_reply failed");
- return err;
- }
-
- if (reply && reply != ENODEV) {
- warnx("kill gserv failed: %s", strerror(reply));
- return reply;
- }
-
- gh->gh_flags &= ~GHF_EXPECT_KILL_GSERV_REPLY;
- socket_shutdown(gh);
-
- err = login(gh);
- if (err)
- warnx("gnbd_login");
-
- return err;
-}
-
-static int
-login_reply(struct gnbd_handle *gh)
-{
- struct login_reply lr;
- int err;
-
- DPRINTF(("read gnbd_login_reply\n"));
- err = read_buf(gh->gh_fd, &lr, sizeof(lr), NULL);
- if (err) {
- warnx("read login_reply failed");
- return err;
- }
-
- if (lr.err) {
- if (lr.version) {
- warnx("gnbd version mismatch %04x != %04x",
- PROTOCOL_VERSION, ntohs(lr.version));
- return EINVAL;
- }
- warnx("login refused: %s", strerror(lr.err));
- return lr.err;
- }
- gh->gh_sectors = ntohll(lr.sectors);
-
- gh->gh_flags &= ~GHF_EXPECT_LOGIN_REPLY;
-
- return GNBD_LOGIN_DONE;
-}
-
-static int
-incoming_request(struct gnbd_handle *gh)
-{
- struct gnbd_request *gr = gh->gh_incoming_request;
- ssize_t done;
- int err;
-
- DPRINTF(("incoming_request: done %d size %d\n", gr->gr_done,
- gr->gr_size));
- err = read_buf(gh->gh_fd, gr->gr_buf + gr->gr_done,
- gr->gr_size - gr->gr_done, &done);
- if (err)
- goto out;
-
- DPRINTF(("incoming_request: got %d\n", done));
- gr->gr_done += done;
- if (gr->gr_done == gr->gr_size) {
- gh->gh_flags &= ~GHF_INCOMING_REQUEST;
- gh->gh_finished_request = gr->gr_cookie;
- free(gr);
- return GNBD_REQUEST_DONE;
- }
-
- return GNBD_CONTINUE;
-
- out:
- gh->gh_flags &= ~GHF_INCOMING_REQUEST;
- gh->gh_finished_request = 0;
- free(gr);
- return err;
-}
-
-
-
-int
-gnbd_close(struct gnbd_handle *gh)
-{
- int err;
- struct gnbd_request **tmp;
-
- for (tmp = &gh->gh_outstanding_requests; *tmp; tmp = &(*tmp)->gr_next)
- free(*tmp);
-
- if (gh->gh_flags & GHF_INCOMING_REQUEST)
- free(gh->gh_incoming_request);
-
- err = close(gh->gh_fd);
- if (err)
- warnx("close");
- free(gh);
-
- return err;
-}
-
-int
-gnbd_fd(struct gnbd_handle *gh)
-{
- return gh->gh_fd;
-}
-
-unsigned long
-gnbd_finished_request(struct gnbd_handle *gh)
-{
- return gh->gh_finished_request;
-}
-
-int
-gnbd_read(struct gnbd_handle *gh, uint64_t sector, ssize_t count,
- unsigned char *buf, unsigned long cookie)
-{
- struct gnbd_server_request gsr;
- struct gnbd_request *gr;
- int err;
-
- gr = malloc(sizeof(struct gnbd_request));
- if (gr == NULL)
- return ENOMEM;
- memset(gr, 0, sizeof(gr));
-
- gr->gr_buf = buf;
- gr->gr_size = count << 9;
- gr->gr_done = 0;
- gr->gr_cookie = cookie;
-
- gsr.magic = htonl(GNBD_REQUEST_MAGIC);
- gsr.type = htonl(GNBD_CMD_READ);
- gsr.from = htonll(sector << 9);
- gsr.len = htonl(gr->gr_size);
- memset(gsr.handle, 0, sizeof(gsr.handle));
- memcpy(gsr.handle, &gr, sizeof(gr));
-
- err = write_buf(gh->gh_fd, &gsr, sizeof(gsr));
- if (err) {
- warnx("write_buf");
- goto out;
- }
-
- *gh->gh_outstanding_requests_last = gr;
- gh->gh_outstanding_requests_last = &gr->gr_next;
-
- return 0;
-
- out:
- free(gr);
- return err;
-}
-
-int
-gnbd_write(struct gnbd_handle *gh, uint64_t sector, ssize_t count,
- unsigned char *buf, unsigned long cookie)
-{
- struct gnbd_server_request gsr;
- struct gnbd_request *gr;
- int err;
-
- gr = malloc(sizeof(struct gnbd_request));
- if (gr == NULL)
- return ENOMEM;
- memset(gr, 0, sizeof(gr));
-
- gr->gr_buf = buf;
- gr->gr_size = count << 9;
- gr->gr_done = 0;
- gr->gr_cookie = cookie;
-
- gsr.magic = htonl(GNBD_REQUEST_MAGIC);
- gsr.type = htonl(GNBD_CMD_WRITE);
- gsr.from = htonll(sector << 9);
- gsr.len = htonl(gr->gr_size);
- memset(gsr.handle, 0, sizeof(gsr.handle));
- memcpy(gsr.handle, &gr, sizeof(gr));
-
- err = write_buf(gh->gh_fd, &gsr, sizeof(gsr));
- if (err) {
- warnx("write_buf");
- goto out;
- }
-
- /* XXX handle non-blocking socket */
- err = write_buf(gh->gh_fd, buf, gr->gr_size);
- if (err) {
- warnx("write_buf");
- goto out;
- }
- gr->gr_done += gr->gr_size;
-
- *gh->gh_outstanding_requests_last = gr;
- gh->gh_outstanding_requests_last = &gr->gr_next;
-
- DPRINTF(("write done\n"));
-
- return 0;
-
- out:
- free(gr);
- return err;
-}
-
-int
-gnbd_reply(struct gnbd_handle *gh)
-{
- struct gnbd_server_reply gsr;
- struct gnbd_request *gr;
- int err;
-
- DPRINTF(("gnbd_reply flags %x\n", gh->gh_flags));
- if ((gh->gh_flags & GHF_EXPECT_KILL_GSERV_REPLY))
- return kill_gserv_reply(gh);
- if ((gh->gh_flags & GHF_EXPECT_LOGIN_REPLY))
- return login_reply(gh);
- if ((gh->gh_flags & GHF_INCOMING_REQUEST))
- return incoming_request(gh);
-
- DPRINTF(("read response\n"));
- err = read_buf(gh->gh_fd, &gsr, sizeof(gsr), NULL);
- if (err) {
- warnx("read gnbd_reply failed");
- return err;
- }
-
- if (ntohl(gsr.error)) {
- warnx("gnbd server reply error: %s", strerror(gsr.error));
- return gsr.error;
- }
-
- switch (ntohl(gsr.magic)) {
- case GNBD_KEEP_ALIVE_MAGIC:
- DPRINTF(("read keep alive magic\n"));
- return GNBD_CONTINUE;
- case GNBD_REPLY_MAGIC:
- DPRINTF(("read reply magic\n"));
- memcpy(&gr, gsr.handle, sizeof(gr));
- err = find_request(gh, gr);
- if (err) {
- warnx("unknown request");
- return err;
- }
- if (gr->gr_done != gr->gr_size) {
- gh->gh_incoming_request = gr;
- gh->gh_flags |= GHF_INCOMING_REQUEST;
- return GNBD_CONTINUE;
- } else {
- gh->gh_finished_request = gr->gr_cookie;
- free(gr);
- return GNBD_REQUEST_DONE;
- }
- default:
- break;
- }
-
- return GNBD_CONTINUE;
-}
-
-uint64_t
-gnbd_sectors(struct gnbd_handle *gh)
-{
-
- return gh->gh_sectors;
-}
-
-struct gnbd_handle *
-gnbd_setup(char *server, unsigned int port, char *devname, char *nodename)
-{
- struct gnbd_handle *gh;
- struct addrinfo *res, *ai;
- int err;
-
- gh = malloc(sizeof(struct gnbd_handle));
- if (gh == NULL)
- return NULL;
- memset(gh, 0, sizeof(gh));
- gh->gh_fd = -1;
- gh->gh_outstanding_requests_last = &gh->gh_outstanding_requests;
-
- strncpy(gh->gh_devname, devname, sizeof(gh->gh_devname));
- strncpy(gh->gh_nodename, nodename, sizeof(gh->gh_nodename));
-
- err = getaddrinfo(server, NULL, NULL, &res);
- if (err) {
- if (err != EAI_SYSTEM)
- warnx("getaddrinfo: %s", gai_strerror(err));
- else
- warn("getaddrinfo: %s", gai_strerror(err));
- goto out;
- }
-
- for (ai = res; ai; ai = ai->ai_next) {
- if (ai->ai_socktype != SOCK_STREAM)
- continue;
- if (ai->ai_family == AF_INET)
- break;
- }
-
- if (ai == NULL)
- goto out;
-
- gh->gh_sin.sin_family = ai->ai_family;
- gh->gh_sin.sin_port = htons(port);
- memcpy(&gh->gh_sin.sin_addr,
- &((struct sockaddr_in *)ai->ai_addr)->sin_addr,
- sizeof(gh->gh_sin.sin_addr));
-
- err = kill_gserv(gh);
- if (err) {
- warnx("gnbd_kill_gserv");
- goto out;
- }
-
- freeaddrinfo(res);
- return gh;
- out:
- free(gh);
- freeaddrinfo(res);
- return NULL;
-}
+++ /dev/null
-/* libgnbd.h
- *
- * gnbd client library
- *
- * Copyright (c) 2005, Christian Limpach
- */
-
-#define GNBD_LOGIN_DONE 0x10001
-#define GNBD_REQUEST_DONE 0x10002
-#define GNBD_CONTINUE 0x10003
-#define GNBD_CONTINUE_WRITE 0x10004
-
-struct gnbd_handle;
-int gnbd_close(struct gnbd_handle *);
-int gnbd_fd(struct gnbd_handle *);
-unsigned long gnbd_finished_request(struct gnbd_handle *);
-int gnbd_kill_gserv(struct gnbd_handle *);
-int gnbd_login(struct gnbd_handle *);
-int gnbd_read(struct gnbd_handle *, uint64_t, ssize_t, unsigned char *,
- unsigned long);
-int gnbd_write(struct gnbd_handle *, uint64_t, ssize_t, unsigned char *,
- unsigned long);
-int gnbd_reply(struct gnbd_handle *);
-uint64_t gnbd_sectors(struct gnbd_handle *);
-struct gnbd_handle *gnbd_setup(char *, unsigned int, char *, char *);
destroy->status = BLKIF_BE_STATUS_OKAY;
}
-void vbd_grow(blkif_be_vbd_grow_t *grow)
+void vbd_create(blkif_be_vbd_create_t *create)
{
blkif_t *blkif;
vdi_t *vdi, **vdip;
- blkif_vdev_t vdevice = grow->vdevice;
+ blkif_vdev_t vdevice = create->vdevice;
- DPRINTF("parallax (vbd_grow): grow=%p\n", grow);
+ DPRINTF("parallax (vbd_create): create=%p\n", create);
- blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
+ blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
if ( blkif == NULL )
{
- DPRINTF("vbd_grow attempted for non-existent blkif (%u,%u)\n",
- grow->domid, grow->blkif_handle);
- grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n",
+ create->domid, create->blkif_handle);
+ create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
return;
}
/* VDI identifier is in grow->extent.sector_start */
- DPRINTF("vbd_grow: grow->extent.sector_start (id) is %llx\n",
- grow->extent.sector_start);
+ DPRINTF("vbd_create: create->dev_handle (id) is %lx\n",
+ (unsigned long)create->dev_handle);
- vdi = vdi_get(grow->extent.sector_start);
+ vdi = vdi_get(create->dev_handle);
if (vdi == NULL)
{
- printf("parallax (vbd_grow): VDI %llx not found.\n",
- grow->extent.sector_start);
- grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
+ printf("parallax (vbd_create): VDI %lx not found.\n",
+ (unsigned long)create->dev_handle);
+ create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
return;
}
*vdip = vdi;
DPRINTF("vbd_grow: happy return!\n");
- grow->status = BLKIF_BE_STATUS_OKAY;
+ create->status = BLKIF_BE_STATUS_OKAY;
}
int parallax_control(control_msg_t *msg)
blkif_destroy((blkif_be_destroy_t *)msg->msg);
break;
- case CMSG_BLKIF_BE_VBD_GROW:
- if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
+ case CMSG_BLKIF_BE_VBD_CREATE:
+ if ( msg->length != sizeof(blkif_be_vbd_create_t) )
goto parse_error;
- vbd_grow((blkif_be_vbd_grow_t *)msg->msg);
+ vbd_create((blkif_be_vbd_create_t *)msg->msg);
break;
}
return 0;
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <pthread.h>
#include "blktaplib.h"
#include "blockstore.h"
#include "vdi.h"
+#include "block-async.h"
+#include "requests-async.h"
#define PARALLAX_DEV 61440
+#define SECTS_PER_NODE 8
+
#if 0
#define DPRINTF(_f, _a...) printf ( _f , ## _a )
destroy->status = BLKIF_BE_STATUS_OKAY;
}
-void vbd_grow(blkif_be_vbd_grow_t *grow)
+void vbd_create(blkif_be_vbd_create_t *create)
{
blkif_t *blkif;
vdi_t *vdi, **vdip;
- blkif_vdev_t vdevice = grow->vdevice;
+ blkif_vdev_t vdevice = create->vdevice;
- DPRINTF("parallax (vbd_grow): grow=%p\n", grow);
+ DPRINTF("parallax (vbd_create): create=%p\n", create);
- blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
+ blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
if ( blkif == NULL )
{
- DPRINTF("vbd_grow attempted for non-existent blkif (%u,%u)\n",
- grow->domid, grow->blkif_handle);
- grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n",
+ create->domid, create->blkif_handle);
+ create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
return;
}
/* VDI identifier is in grow->extent.sector_start */
- DPRINTF("vbd_grow: grow->extent.sector_start (id) is %llx\n",
- grow->extent.sector_start);
+ DPRINTF("vbd_create: create->dev_handle (id) is %lx\n",
+ (unsigned long)create->dev_handle);
- vdi = vdi_get(grow->extent.sector_start);
+ vdi = vdi_get(create->dev_handle);
if (vdi == NULL)
{
- printf("parallax (vbd_grow): VDI %llx not found.\n",
- grow->extent.sector_start);
- grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
+ printf("parallax (vbd_create): VDI %lx not found.\n",
+ (unsigned long)create->dev_handle);
+ create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
return;
}
*vdip = vdi;
DPRINTF("vbd_grow: happy return!\n");
- grow->status = BLKIF_BE_STATUS_OKAY;
+ create->status = BLKIF_BE_STATUS_OKAY;
}
int parallax_control(control_msg_t *msg)
blkif_destroy((blkif_be_destroy_t *)msg->msg);
break;
- case CMSG_BLKIF_BE_VBD_GROW:
- if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
+ case CMSG_BLKIF_BE_VBD_CREATE:
+ if ( msg->length != sizeof(blkif_be_vbd_create_t) )
goto parse_error;
- vbd_grow((blkif_be_vbd_grow_t *)msg->msg);
+ vbd_create((blkif_be_vbd_create_t *)msg->msg);
break;
}
return 0;
img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
img_info[nr_vdis].device = vdi->vdevice;
img_info[nr_vdis].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
- /* The -2 here accounts for the LSB in the radix tree */
+ /* The -1 here accounts for the LSB in the radix tree */
img_info[nr_vdis].capacity =
- ((1LL << (VDI_HEIGHT-2)) >> SECTOR_SHIFT);
+ ((1LL << (VDI_HEIGHT-1)) * SECTS_PER_NODE);
nr_vdis++;
vdi = vdi->next;
}
return BLKTAP_RESPOND;
}
+typedef struct {
+ blkif_request_t *req;
+ int count;
+ int error;
+ pthread_mutex_t mutex;
+} pending_t;
+
+#define MAX_REQUESTS 64
+pending_t pending_list[MAX_REQUESTS];
+
+struct cb_param {
+ pending_t *pent;
+ int segment;
+ u64 sector;
+ u64 vblock; /* for debug printing -- can be removed. */
+};
+
+static void read_cb(struct io_ret r, void *in_param)
+{
+ struct cb_param *param = (struct cb_param *)in_param;
+ pending_t *p = param->pent;
+ int segment = param->segment;
+ blkif_request_t *req = p->req;
+ unsigned long size, offset, start;
+ char *dpage, *spage;
+
+ spage = IO_BLOCK(r);
+ if (spage == NULL) { p->error++; goto finish; }
+ dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), segment);
+
+ /* Calculate read size and offset within the read block. */
+
+ offset = (param->sector << SECTOR_SHIFT) % BLOCK_SIZE;
+ size = ( blkif_last_sect (req->frame_and_sects[segment]) -
+ blkif_first_sect(req->frame_and_sects[segment]) + 1
+ ) << SECTOR_SHIFT;
+ start = blkif_first_sect(req->frame_and_sects[segment])
+ << SECTOR_SHIFT;
+
+ DPRINTF("ParallaxRead: sect: %lld (%ld,%ld), "
+ "vblock %llx, "
+ "size %lx\n",
+ param->sector, blkif_first_sect(p->req->frame_and_sects[segment]),
+ blkif_last_sect (p->req->frame_and_sects[segment]),
+ param->vblock, size);
+
+ memcpy(dpage + start, spage + offset, size);
+ freeblock(spage);
+
+ /* Done the read. Now update the pending record. */
+ finish:
+ pthread_mutex_lock(&p->mutex);
+ p->count--;
+
+ if (p->count == 0) {
+ blkif_response_t *rsp;
+
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_READ;
+ if (p->error == 0) {
+ rsp->status = BLKIF_RSP_OKAY;
+ } else {
+ rsp->status = BLKIF_RSP_ERROR;
+ }
+ blktap_inject_response(rsp);
+ }
+
+ pthread_mutex_unlock(&p->mutex);
+
+ free(param); /* TODO: replace with cached alloc/dealloc */
+}
+
int parallax_read(blkif_request_t *req, blkif_t *blkif)
{
blkif_response_t *rsp;
- unsigned long size, offset, start;
- u64 sector;
u64 vblock, gblock;
vdi_t *vdi;
+ u64 sector;
int i;
char *dpage, *spage;
+ pending_t *pent;
vdi = blkif_get_vdi(blkif, req->device);
if ( vdi == NULL )
goto err;
+
+ pent = &pending_list[ID_TO_IDX(req->id)];
+ pent->count = req->nr_segments;
+ pent->req = req;
+ pthread_mutex_init(&pent->mutex, NULL);
for (i = 0; i < req->nr_segments; i++) {
-
- dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-
- /* Round the requested segment to a block address. */
-
- sector = req->sector_number + (8*i);
- vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
-
- /* Get that block from the store. */
-
- gblock = vdi_lookup_block(vdi, vblock, NULL);
-
- /* Calculate read size and offset within the read block. */
-
- offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
- size = ( blkif_last_sect (req->frame_and_sects[i]) -
- blkif_first_sect(req->frame_and_sects[i]) + 1
- ) << SECTOR_SHIFT;
- start = blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
-
- /* If the block does not exist in the store, return zeros. */
- /* Otherwise, copy that region to the guest page. */
-
- DPRINTF("ParallaxRead: sect: %lld (%ld,%ld), "
- "vblock %llx, gblock %llx, "
- "size %lx\n",
- sector, blkif_first_sect(req->frame_and_sects[i]),
- blkif_last_sect (req->frame_and_sects[i]),
- vblock, gblock, size);
-
- if ( gblock == 0 ) {
-
- memset(dpage + start, '\0', size);
-
- } else {
-
- spage = readblock(gblock);
-
- if (spage == NULL) {
- printf("Error reading gblock from store: %Ld\n", gblock);
- goto err;
- }
-
- memcpy(dpage + start, spage + offset, size);
-
- freeblock(spage);
- }
-
- }
+ pthread_t tid;
+ int ret;
+ struct cb_param *p;
+
+ /* Round the requested segment to a block address. */
+ sector = req->sector_number + (8*i);
+ vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
+
+ /* TODO: Replace this call to malloc with a cached allocation */
+ p = (struct cb_param *)malloc(sizeof(struct cb_param));
+ p->pent = pent;
+ p->sector = sector;
+ p->segment = i;
+ p->vblock = vblock; /* dbg */
+
+ /* Get that block from the store. */
+ async_read(vdi, vblock, read_cb, (void *)p);
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_READ;
- rsp->status = BLKIF_RSP_OKAY;
+ }
+
+ return BLKTAP_STOLEN;
- return BLKTAP_RESPOND;
err:
rsp = (blkif_response_t *)req;
rsp->id = req->id;
return BLKTAP_RESPOND;
}
+static void write_cb(struct io_ret r, void *in_param)
+{
+ struct cb_param *param = (struct cb_param *)in_param;
+ pending_t *p = param->pent;
+ blkif_request_t *req = p->req;
+
+ /* catch errors from the block code. */
+ if (IO_INT(r) < 0) p->error++;
+
+ pthread_mutex_lock(&p->mutex);
+ p->count--;
+
+ if (p->count == 0) {
+ blkif_response_t *rsp;
+
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_WRITE;
+ if (p->error == 0) {
+ rsp->status = BLKIF_RSP_OKAY;
+ } else {
+ rsp->status = BLKIF_RSP_ERROR;
+ }
+ blktap_inject_response(rsp);
+ }
+
+ pthread_mutex_unlock(&p->mutex);
+
+ free(param); /* TODO: replace with cached alloc/dealloc */
+}
+
int parallax_write(blkif_request_t *req, blkif_t *blkif)
{
blkif_response_t *rsp;
char *spage;
unsigned long size, offset, start;
vdi_t *vdi;
+ pending_t *pent;
vdi = blkif_get_vdi(blkif, req->device);
if ( vdi == NULL )
goto err;
+
+ pent = &pending_list[ID_TO_IDX(req->id)];
+ pent->count = req->nr_segments;
+ pent->req = req;
+ pthread_mutex_init(&pent->mutex, NULL);
for (i = 0; i < req->nr_segments; i++) {
+ struct cb_param *p;
spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
sector = req->sector_number + (8*i);
vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
- /* Get that block from the store. */
-
- gblock = vdi_lookup_block(vdi, vblock, &writable);
-
/* Calculate read size and offset within the read block. */
offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
printf("]\n] STRANGE WRITE!\n]\n");
goto err;
}
-
- if (( gblock == 0 ) || ( writable == 0 )) {
-
- gblock = allocblock(spage);
- vdi_update_block(vdi, vblock, gblock);
-
- } else {
-
- /* write-in-place, no need to change mappings. */
- writeblock(gblock, spage);
-
- }
-
+
+ /* TODO: Replace this call to malloc with a cached allocation */
+ p = (struct cb_param *)malloc(sizeof(struct cb_param));
+ p->pent = pent;
+ p->sector = sector;
+ p->segment = i;
+ p->vblock = vblock; /* dbg */
+
+ /* Issue the write to the store. */
+ async_write(vdi, vblock, spage, write_cb, (void *)p);
}
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_WRITE;
- rsp->status = BLKIF_RSP_OKAY;
+ return BLKTAP_STOLEN;
- return BLKTAP_RESPOND;
err:
rsp = (blkif_response_t *)req;
rsp->id = req->id;
}
+
int main(int argc, char *argv[])
{
DPRINTF("parallax: starting.\n");
__init_blockstore();
DPRINTF("parallax: initialized blockstore...\n");
+ init_block_async();
+ DPRINTF("parallax: initialized async blocks...\n");
__init_vdi();
DPRINTF("parallax: initialized vdi registry etc...\n");
__init_parallax();
DPRINTF("parallax: initialized local stuff..\n");
-
+
blktap_register_ctrl_hook("parallax_control", parallax_control);
blktap_register_request_hook("parallax_request", parallax_request);
DPRINTF("parallax: added ctrl + request hooks, starting listen...\n");
#define DEBUG
*/
-/*
-#define STAGED
-*/
-
-#define ZERO 0LL
-#define ONE 1LL
-#define ONEMASK 0xffffffffffffffeLL
-
-
-typedef u64 *radix_tree_node;
-
-
/* Experimental radix cache. */
static pthread_mutex_t rcache_mutex = PTHREAD_MUTEX_INITIALIZER;
*
* @return: value on success, zero on error
*/
-#ifndef STAGED
u64 lookup(int height, u64 root, u64 key) {
radix_tree_node node;
return ZERO;
}
-#else /* STAGED */
-
-
-/* non-recursive staged lookup, assume height is 35. */
-u64 lookup(int height, u64 root, u64 key) {
- radix_tree_node node;
- u64 mask = ONE;
-
-printf("lookup!\n");
- assert(key >> 35 == 0);
-
- /* the root block may be smaller to ensure all leaves are full */
- height = 27;
-
- /* now carve off equal sized chunks at each step */
-
- /* ROOT: (LEVEL 0) KEYLEN=35*/
- if (getid(root) == ZERO)
- return ZERO;
-
- node = (radix_tree_node) readblock(getid(root));
- if (node == NULL)
- return ZERO;
-
- root = node[(key >> height) & RADIX_TREE_MAP_MASK];
- mask &= root;
- freeblock(node);
-
- if (height == 0)
- return ( root & ONEMASK ) | mask;
-
- height -= RADIX_TREE_MAP_SHIFT; /* == 18 */
-
- /* LEVEL 1: KEYLEN=26*/
- if (getid(root) == ZERO)
- return ZERO;
-
- node = (radix_tree_node) readblock(getid(root));
- if (node == NULL)
- return ZERO;
-
- root = node[(key >> height) & RADIX_TREE_MAP_MASK];
- mask &= root;
- freeblock(node);
-
- if (height == 0)
- return ( root & ONEMASK ) | mask;
-
- height -= RADIX_TREE_MAP_SHIFT; /* == 9 */
-
- /* LEVEL 2: KEYLEN=17*/
- if (getid(root) == ZERO)
- return ZERO;
-
- node = (radix_tree_node) readblock(getid(root));
- if (node == NULL)
- return ZERO;
-
- root = node[(key >> height) & RADIX_TREE_MAP_MASK];
- mask &= root;
- freeblock(node);
-
- if (height == 0)
- return ( root & ONEMASK ) | mask;
-
- height -= RADIX_TREE_MAP_SHIFT; /* == 0 */
-
- /* LEVEL 3: KEYLEN=8*/
- if (getid(root) == ZERO)
- return ZERO;
-
- node = (radix_tree_node) readblock(getid(root));
- if (node == NULL)
- return ZERO;
-
- root = node[(key >> height) & RADIX_TREE_MAP_MASK];
- mask &= root;
- freeblock(node);
-
- // if (height == 0)
- return ( root & ONEMASK ) | mask;
-
-}
-
-#endif
-
/*
* update: set a radix tree entry, doing copy-on-write as necessary
* @height: height in bits of the radix tree
* @returns: (possibly new) root id on success (with LSB=1), 0 on failure
*/
-#ifndef STAGED
-
-
u64 update(int height, u64 root, u64 key, u64 val) {
int offset;
u64 child;
return root;
}
-
-#else /* STAGED */
-
-/* When update is called, state->next points to the thing to call after
- * update is finished. */
-
-struct cb_state_st;
-
-typedef struct {
- /* public stuff */
- u64 val;
- u64 key;
- u64 result;
-
- /* internal state */
- u64 root[4];
- radix_tree_node node[4];
- void (*next)(struct cb_state_st *);
- int err;
-} radix_update_t;
-
-typedef struct cb_state_st{
- void (*next)(struct cb_state_st *); /* Next continuation. */
- union {
- radix_update_t update;
- } radix;
-} cb_state_t;
-
-void s_readblock(cb_state_t *state, u64 id, void **ret)
-{
- *ret = readblock(id);
- state->next(state);
-}
-
-void s_allocblock(cb_state_t *state, void *block, u64 *ret)
-{
- *ret = allocblock(block);
- state->next(state);
-}
-
-void s_writeblock(cb_state_t *state, u64 id, void *block, int *ret)
-{
- *ret = writeblock(id, block);
- state->next(state);
-}
-
-void cb_done(cb_state_t *state)
-{
- printf("*** done ***\n");
-}
-
-/* forward prototypes. */
-void up0(cb_state_t *state);
-void up1(cb_state_t *state);
-void up2(cb_state_t *state);
-void up3(cb_state_t *state);
-void up4(cb_state_t *state);
-void up5(cb_state_t *state);
-void up6(cb_state_t *state);
-void up7(cb_state_t *state);
-void up8(cb_state_t *state);
-void up9(cb_state_t *state);
-void up10(cb_state_t *state);
-void up11(cb_state_t *state);
-void up12(cb_state_t *state);
-
-u64 update(int height, u64 root, u64 key, u64 val)
-{
- cb_state_t state;
- radix_update_t *u = &state.radix.update;
-
- u->val = val;
- u->key = key;
- u->root[0] = root;
- u->root[1] = u->root[2] = u->root[3] = ZERO;
- u->node[0] = u->node[1] = u->node[2] = u->node[3] = NULL;
-
- /* take a copy of the higher-scoped next continuation. */
- u->next = state->next;
-
- /* update start state */
- state->next = up0;
-
- for (;;)
- {
- state->next(state);
- if (state->next == NULL)
- break;
- }
-
- return u->result;
-}
-
-/* c0:*/
-void up0(cb_state_t *state) {
- radix_update_t *u = &state->radix.update;
-
- state->next = up1;
- s_readblock(state, getid(u->root[0]), (void **)&(u->node[0]));
-}
-
-/* c1: */
-void up1(cb_state_t *state) {
- radix_update_t *u = &state->radix.update;
-
- u->root[1] = u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK];
- if (u->root[1] == ZERO) {
- u->node[1] = (radix_tree_node) newblock();
- /* goto next continuation (c2)*/ up2(state);return;
- } else {
- state->next = up2;
- s_readblock(state, getid(u->root[1]), (void **)&(u->node[1]));
- }
-}
-
-/* c2: */
-void up2(cb_state_t *state) {
- radix_update_t *u = &state->radix.update;
-
- if ((u->root[1] != ZERO) && (!iswritable(u->root[1]))) {
- /* need to clone this node */
- radix_tree_node oldnode = u->node[1];
- u->node[1] = cloneblock(u->node[1]);
- freeblock(oldnode);
- u->root[1] = ZERO;
- }
- u->root[2] = u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK];
- if (u->root[2] == ZERO) {
- u->node[2] = (radix_tree_node) newblock();
- /* goto next continuation (c3)*/ up3(state);return;
- } else {
- state->next = up3;
- s_readblock(state, getid(u->root[2]), (void **)&(u->node[2]));
- }
-}
-
-/* c3: */
-void up3(cb_state_t *state) {
- radix_update_t *u = &state->radix.update;
-
- if ((u->root[2] != ZERO) && (!iswritable(u->root[2]))) {
- /* need to clone this node */
- radix_tree_node oldnode = u->node[2];
- u->node[2] = cloneblock(u->node[2]);
- freeblock(oldnode);
- u->root[2] = ZERO;
- }
- u->root[3] = u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK];
- if (u->root[3] == ZERO) {
- u->node[3] = (radix_tree_node) newblock();
- /* goto next continuation (c4)*/ up4(state);return;
- } else {
- state->next = up4;
- s_readblock(state, getid(u->root[3]), (void **)&(u->node[3]));
- }
-}
-
-/* c4: */
-void up4(cb_state_t *state) {
- radix_update_t *u = &state->radix.update;
-
- if ((u->root[3] != ZERO) && (!iswritable(u->root[3]))) {
- /* need to clone this node */
- radix_tree_node oldnode = u->node[3];
- u->node[3] = cloneblock(u->node[3]);
- freeblock(oldnode);
- u->root[3] = ZERO;
- }
-
- if (u->node[3][u->key & RADIX_TREE_MAP_MASK] == u->val){
- /* no change, so we already owned the child */
- /* goto last continuation (c12) */ up12(state);return;
- }
-
- u->node[3][u->key & RADIX_TREE_MAP_MASK] = u->val;
-
- /* new/cloned blocks need to be saved */
- if (u->root[3] == ZERO) {
- /* mark this as an owned block */
- state->next = up5;
- s_allocblock(state, u->node[3], &u->root[3]);
- /* goto continuation (c5) */ return;
- } else {
- state->next = up6;
- s_writeblock(state, getid(u->root[3]), u->node[3], &u->err);
- /* goto continuation (c6) */ return;
- }
-}
-
-/* c5: */
-void up5(cb_state_t *state) {
- radix_update_t *u = &state->radix.update;
-
- if (u->root[3])
- u->root[3] = writable(u->root[3]);
- /* goto continuation (c6) */ up6(state);return;
-}
-
-/* c6: */
-void up6(cb_state_t *state) {
- radix_update_t *u = &state->radix.update;
-
- if (u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK] == u->root[3]){
- /* no change, so we already owned the child */
- /* goto last continuation (c12) */ up12(state);return;
- }
-
- u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK] = u->root[3];
-
- /* new/cloned blocks need to be saved */
- if (u->root[2] == ZERO) {
- /* mark this as an owned block */
- state->next = up7;
- s_allocblock(state, u->node[2], &u->root[2]);
- /* goto continuation (c7) */return;
- } else {
- state->next = up8;
- s_writeblock(state, getid(u->root[2]), u->node[2], &u->err);
- /* goto continuation (c8) */return;
- }
-}
-
-/* c7: */
-void up7(cb_state_t *state) {
- radix_update_t *u = &state->radix.update;
-
- if (u->root[2])
- u->root[2] = writable(u->root[2]);
- /* goto continuation (c8) */ up8(state);return;
-}
-
-/* c8: */
-void up8(cb_state_t *state) {
- radix_update_t *u = &state->radix.update;
-
- if (u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK] == u->root[2]){
- /* no change, so we already owned the child */
- /* goto last continuation (c12) */ up12(state);return;
- }
-
- u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK] = u->root[2];
-
- /* new/cloned blocks need to be saved */
- if (u->root[1] == ZERO) {
- /* mark this as an owned block */
- state->next = up9;
- s_allocblock(state, u->node[1], &u->root[1]);
- /* goto continuation (c9) */return;
- } else {
- state->next = up10;
- s_writeblock(state, getid(u->root[1]), u->node[1], &u->err);
- /* goto continuation (c10) */return;
- }
-}
-
-/* c9: */
-void up9(cb_state_t *state) {
- radix_update_t *u = &state->radix.update;
-
- if (u->root[1])
- u->root[1] = writable(u->root[1]);
- /* goto continuation (c10) */ up10(state);return;
-}
-
-/* c10: */
-void up10(cb_state_t *state) {
- radix_update_t *u = &state->radix.update;
-
- if (u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK] == u->root[1]){
- /* no change, so we already owned the child */
- /* goto last continuation (c12) */ up12(state);return;
- }
-
- u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK] = u->root[1];
-
- /* new/cloned blocks need to be saved */
- if (u->root[0] == ZERO) {
- /* mark this as an owned block */
- state->next = up11;
- s_allocblock(state, u->node[0], &u->root[0]);
- /* goto continuation (c11) */ return;
- } else {
- state->next = up10;
- s_writeblock(state, getid(u->root[0]), u->node[0], &u->err);
- /* goto continuation (c12) */ return;
- }
-}
-
-/* c11: */
-void up11(cb_state_t *state) {
- radix_update_t *u = &state->radix.update;
-
- if (u->root[0])
- u->root[0] = writable(u->root[0]);
- /* goto continuation (c12) */ up12(state);return;
-}
-
-/* c12: */
-void up12(cb_state_t *state) {
- radix_update_t *u = &state->radix.update;
-
- int i;
- for (i=0;i<4;i++)
- if(u->node[i] != NULL) freeblock(u->node[i]);
-
- u->result = u->root[0];
- state->next = u->next;
-
- state->next(state);return;
-}
-
-#endif
-
-
/**
* snapshot: create a snapshot
* @root: old root node
int i, numlinks, ret, total = 0;
radix_tree_node pnode, cnode;
-//printf("proot: %Ld\n", getid(proot));
if (height == 0) {
height = -1; /* terminate recursion */
} else {
#define putid(x) ((x)<<1)
#define writable(x) (((x)<<1)|1LL)
#define iswritable(x) ((x)&1LL)
+#define ZERO 0LL
+#define ONE 1LL
+#define ONEMASK 0xffffffffffffffeLL
+
+#define RADIX_TREE_MAP_SHIFT 9
+#define RADIX_TREE_MAP_MASK 0x1ff
+#define RADIX_TREE_MAP_ENTRIES 512
+
+typedef u64 *radix_tree_node;
+
/*
* main api
--- /dev/null
+/* read.c\r
+ *\r
+ * asynchronous read experiment for parallax.\r
+ */\r
+\r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include <string.h>\r
+#include <assert.h>\r
+#include <pthread.h>\r
+#include "requests-async.h"\r
+#include "vdi.h"\r
+#include "radix.h"\r
+\r
+#define L1_IDX(_a) (((_a) & 0x0000000007fc0000ULL) >> 18)\r
+#define L2_IDX(_a) (((_a) & 0x000000000003fe00ULL) >> 9)\r
+#define L3_IDX(_a) (((_a) & 0x00000000000001ffULL))\r
+\r
+\r
+\r
+//#define STANDALONE\r
+\r
+#if 0\r
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )\r
+#else\r
+#define DPRINTF(_f, _a...) ((void)0)\r
+#endif\r
+\r
+\r
+struct io_req {\r
+ enum { IO_OP_READ, IO_OP_WRITE } op;\r
+ u64 root;\r
+ u64 vaddr;\r
+ int state;\r
+ io_cb_t cb;\r
+ void *param;\r
+ struct radix_lock *lock;\r
+\r
+ /* internal stuff: */\r
+ struct io_ret retval;/* holds the return while we unlock. */\r
+ char *block; /* the block to write */\r
+ radix_tree_node radix[3];\r
+ u64 radix_addr[3];\r
+};\r
+\r
+void clear_w_bits(radix_tree_node node) \r
+{\r
+ int i;\r
+ for (i=0; i<RADIX_TREE_MAP_ENTRIES; i++)\r
+ node[i] = node[i] & ONEMASK;\r
+ return;\r
+}\r
+\r
+enum states {\r
+ /* both */\r
+ READ_L1,\r
+ READ_L2,\r
+ READ_L3,\r
+\r
+ /* read */\r
+ READ_LOCKED,\r
+ READ_DATA,\r
+ READ_UNLOCKED,\r
+ RETURN_ZERO,\r
+\r
+ /* write */\r
+ WRITE_LOCKED,\r
+ WRITE_DATA,\r
+ WRITE_UNLOCKED,\r
+ \r
+ /* L3 Zero Path */\r
+ ALLOC_DATA_L3z,\r
+ WRITE_L3_L3z,\r
+ \r
+ /* L3 Fault Path */\r
+ ALLOC_DATA_L3f,\r
+ WRITE_L3_L3f,\r
+ \r
+ /* L2 Zero Path */\r
+ ALLOC_DATA_L2z,\r
+ WRITE_L2_L2z,\r
+ ALLOC_L3_L2z,\r
+ WRITE_L2_L3z,\r
+ \r
+ /* L2 Fault Path */\r
+ READ_L3_L2f,\r
+ ALLOC_DATA_L2f,\r
+ WRITE_L2_L2f,\r
+ ALLOC_L3_L2f,\r
+ WRITE_L2_L3f,\r
+\r
+ /* L1 Zero Path */\r
+ ALLOC_DATA_L1z,\r
+ ALLOC_L3_L1z,\r
+ ALLOC_L2_L1z,\r
+ WRITE_L1_L1z,\r
+\r
+ /* L1 Fault Path */\r
+ READ_L2_L1f,\r
+ READ_L3_L1f,\r
+ ALLOC_DATA_L1f,\r
+ ALLOC_L3_L1f,\r
+ ALLOC_L2_L1f,\r
+ WRITE_L1_L1f,\r
+ \r
+};\r
+\r
+enum radix_offsets {\r
+ L1 = 0, \r
+ L2 = 1,\r
+ L3 = 2\r
+};\r
+\r
+\r
+static void read_cb(struct io_ret ret, void *param);\r
+static void write_cb(struct io_ret ret, void *param);\r
+\r
+\r
+int async_read(vdi_t *vdi, u64 vaddr, io_cb_t cb, void *param)\r
+{\r
+ struct io_req *req;\r
+\r
+ DPRINTF("async_read\n");\r
+\r
+ req = (struct io_req *)malloc(sizeof (struct io_req));\r
+ req->radix[0] = req->radix[1] = req->radix[2] = NULL;\r
+\r
+ if (req == NULL) {perror("req was NULL in async_read"); return(-1); }\r
+ \r
+ req->op = IO_OP_READ;\r
+ req->root = vdi->radix_root;\r
+ req->lock = vdi->radix_lock; \r
+ req->vaddr = vaddr;\r
+ req->cb = cb;\r
+ req->param = param;\r
+ req->state = READ_LOCKED;\r
+\r
+ block_rlock(req->lock, L1_IDX(vaddr), read_cb, req);\r
+ \r
+ return 0;\r
+}\r
+\r
+\r
+int async_write(vdi_t *vdi, u64 vaddr, char *block, \r
+ io_cb_t cb, void *param)\r
+{\r
+ struct io_req *req;\r
+\r
+\r
+ req = (struct io_req *)malloc(sizeof (struct io_req));\r
+ req->radix[0] = req->radix[1] = req->radix[2] = NULL;\r
+ //DPRINTF("async_write\n");\r
+ \r
+ if (req == NULL) {perror("req was NULL in async_write"); return(-1); }\r
+\r
+ req->op = IO_OP_WRITE;\r
+ req->root = vdi->radix_root;\r
+ req->lock = vdi->radix_lock; \r
+ req->vaddr = vaddr;\r
+ req->block = block;\r
+ req->cb = cb;\r
+ req->param = param;\r
+ req->radix_addr[L1] = getid(req->root); /* for consistency */\r
+ req->state = WRITE_LOCKED;\r
+\r
+ block_wlock(req->lock, L1_IDX(vaddr), write_cb, req);\r
+\r
+\r
+ return 0;\r
+}\r
+\r
+void read_cb(struct io_ret ret, void *param)\r
+{\r
+ struct io_req *req = (struct io_req *)param;\r
+ radix_tree_node node;\r
+ u64 idx;\r
+ char *block;\r
+ void *req_param;\r
+\r
+ DPRINTF("read_cb\n");\r
+ /* get record */\r
+ switch(req->state) {\r
+ \r
+ case READ_LOCKED: \r
+ \r
+ DPRINTF("READ_LOCKED\n");\r
+ req->state = READ_L1;\r
+ block_read(getid(req->root), read_cb, req); \r
+ break;\r
+ \r
+ case READ_L1: /* block is the radix root */\r
+\r
+ DPRINTF("READ_L1\n");\r
+ block = IO_BLOCK(ret);\r
+ if (block == NULL) goto fail;\r
+ node = (radix_tree_node) block;\r
+ idx = getid( node[L1_IDX(req->vaddr)] );\r
+ free(block);\r
+ if ( idx == ZERO ) {\r
+ req->state = RETURN_ZERO;\r
+ block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);\r
+ } else {\r
+ req->state = READ_L2;\r
+ block_read(idx, read_cb, req);\r
+ }\r
+ break;\r
+\r
+ case READ_L2:\r
+\r
+ DPRINTF("READ_L2\n");\r
+ block = IO_BLOCK(ret);\r
+ if (block == NULL) goto fail;\r
+ node = (radix_tree_node) block;\r
+ idx = getid( node[L2_IDX(req->vaddr)] );\r
+ free(block);\r
+ if ( idx == ZERO ) {\r
+ req->state = RETURN_ZERO;\r
+ block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);\r
+ } else {\r
+ req->state = READ_L3;\r
+ block_read(idx, read_cb, req);\r
+ }\r
+ break;\r
+\r
+ case READ_L3:\r
+ \r
+ DPRINTF("READ_L3\n");\r
+ block = IO_BLOCK(ret);\r
+ if (block == NULL) goto fail;\r
+ node = (radix_tree_node) block;\r
+ idx = getid( node[L3_IDX(req->vaddr)] );\r
+ free(block);\r
+ if ( idx == ZERO ) {\r
+ req->state = RETURN_ZERO;\r
+ block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);\r
+ } else {\r
+ req->state = READ_DATA;\r
+ block_read(idx, read_cb, req);\r
+ }\r
+ break;\r
+\r
+ case READ_DATA:\r
+ \r
+ DPRINTF("READ_DATA\n");\r
+ if (IO_BLOCK(ret) == NULL) goto fail;\r
+ req->retval = ret;\r
+ req->state = READ_UNLOCKED;\r
+ block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);\r
+ break;\r
+ \r
+ case READ_UNLOCKED:\r
+ {\r
+ struct io_ret r;\r
+ io_cb_t cb;\r
+ DPRINTF("READ_UNLOCKED\n");\r
+ req_param = req->param;\r
+ r = req->retval;\r
+ cb = req->cb;\r
+ free(req);\r
+ cb(r, req_param);\r
+ break;\r
+ }\r
+ \r
+ case RETURN_ZERO:\r
+ {\r
+ struct io_ret r;\r
+ io_cb_t cb;\r
+ DPRINTF("RETURN_ZERO\n");\r
+ req_param = req->param;\r
+ cb = req->cb;\r
+ free(req);\r
+ r.type = IO_BLOCK_T;\r
+ r.u.b = newblock();\r
+ cb(r, req_param);\r
+ break;\r
+ }\r
+ \r
+ default:\r
+ DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);\r
+ goto fail;\r
+ }\r
+ \r
+ return;\r
+\r
+ fail:\r
+ {\r
+ struct io_ret r;\r
+ io_cb_t cb;\r
+ DPRINTF("asyn_read had a read error.\n");\r
+ req_param = req->param;\r
+ r = ret;\r
+ cb = req->cb;\r
+ free(req);\r
+ cb(r, req_param);\r
+ }\r
+\r
+\r
+}\r
+\r
+void write_cb(struct io_ret r, void *param)\r
+{\r
+ struct io_req *req = (struct io_req *)param;\r
+ radix_tree_node node;\r
+ u64 a, addr;\r
+ void *req_param;\r
+\r
+ //DPRINTF("write_cb\n");\r
+ switch(req->state) {\r
+ \r
+ case WRITE_LOCKED:\r
+ \r
+ DPRINTF("WRITE_LOCKED (%llu)\n", L1_IDX(req->vaddr));\r
+ req->state = READ_L1;\r
+ block_read(getid(req->root), write_cb, req); \r
+ break;\r
+ \r
+ case READ_L1: /* block is the radix root */\r
+\r
+ DPRINTF("READ_L1\n");\r
+ node = (radix_tree_node) IO_BLOCK(r);\r
+ if (node == NULL) goto fail;\r
+ a = node[L1_IDX(req->vaddr)];\r
+ addr = getid(a);\r
+\r
+ req->radix_addr[L2] = addr;\r
+ req->radix[L1] = node;\r
+\r
+ if ( addr == ZERO ) {\r
+ /* L1 empty subtree: */\r
+ req->state = ALLOC_DATA_L1z;\r
+ block_alloc( req->block, write_cb, req );\r
+ } else if ( !iswritable(a) ) {\r
+ /* L1 fault: */\r
+ req->state = READ_L2_L1f;\r
+ block_read( addr, write_cb, req );\r
+ } else {\r
+ req->state = READ_L2;\r
+ block_read( addr, write_cb, req );\r
+ }\r
+ break;\r
+ \r
+ case READ_L2:\r
+\r
+ DPRINTF("READ_L2\n");\r
+ node = (radix_tree_node) IO_BLOCK(r);\r
+ if (node == NULL) goto fail;\r
+ a = node[L2_IDX(req->vaddr)];\r
+ addr = getid(a);\r
+\r
+ req->radix_addr[L3] = addr;\r
+ req->radix[L2] = node;\r
+\r
+ if ( addr == ZERO ) {\r
+ /* L2 empty subtree: */\r
+ req->state = ALLOC_DATA_L2z;\r
+ block_alloc( req->block, write_cb, req );\r
+ } else if ( !iswritable(a) ) {\r
+ /* L2 fault: */\r
+ req->state = READ_L3_L2f;\r
+ block_read( addr, write_cb, req );\r
+ } else {\r
+ req->state = READ_L3;\r
+ block_read( addr, write_cb, req );\r
+ }\r
+ break;\r
+ \r
+ case READ_L3:\r
+\r
+ DPRINTF("READ_L3\n");\r
+ node = (radix_tree_node) IO_BLOCK(r);\r
+ if (node == NULL) goto fail;\r
+ a = node[L3_IDX(req->vaddr)];\r
+ addr = getid(a);\r
+\r
+ req->radix[L3] = node;\r
+\r
+ if ( addr == ZERO ) {\r
+ /* L3 fault: */\r
+ req->state = ALLOC_DATA_L3z;\r
+ block_alloc( req->block, write_cb, req );\r
+ } else if ( !iswritable(a) ) {\r
+ /* L3 fault: */\r
+ req->state = ALLOC_DATA_L3f;\r
+ block_alloc( req->block, write_cb, req );\r
+ } else {\r
+ req->state = WRITE_DATA;\r
+ block_write( addr, req->block, write_cb, req );\r
+ }\r
+ break;\r
+ \r
+ /* L3 Zero Path: */\r
+\r
+ case ALLOC_DATA_L3z:\r
+\r
+ DPRINTF("ALLOC_DATA_L3z\n");\r
+ addr = IO_ADDR(r);\r
+ a = writable(addr);\r
+ req->radix[L3][L3_IDX(req->vaddr)] = a;\r
+ req->state = WRITE_L3_L3z;\r
+ block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);\r
+ break;\r
+ \r
+ /* L3 Fault Path: */\r
+\r
+ case ALLOC_DATA_L3f:\r
+\r
+ DPRINTF("ALLOC_DATA_L3f\n");\r
+ addr = IO_ADDR(r);\r
+ a = writable(addr);\r
+ req->radix[L3][L3_IDX(req->vaddr)] = a;\r
+ req->state = WRITE_L3_L3f;\r
+ block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);\r
+ break;\r
+\r
+ /* L2 Zero Path: */\r
+ \r
+ case ALLOC_DATA_L2z:\r
+\r
+ DPRINTF("ALLOC_DATA_L2z\n");\r
+ addr = IO_ADDR(r);\r
+ a = writable(addr);\r
+ req->radix[L3] = newblock();\r
+ req->radix[L3][L3_IDX(req->vaddr)] = a;\r
+ req->state = ALLOC_L3_L2z;\r
+ block_alloc( (char*)req->radix[L3], write_cb, req );\r
+ break;\r
+\r
+ case ALLOC_L3_L2z:\r
+\r
+ DPRINTF("ALLOC_L3_L2z\n");\r
+ addr = IO_ADDR(r);\r
+ a = writable(addr);\r
+ req->radix[L2][L2_IDX(req->vaddr)] = a;\r
+ req->state = WRITE_L2_L2z;\r
+ block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);\r
+ break;\r
+ \r
+ /* L2 Fault Path: */\r
+ \r
+ case READ_L3_L2f:\r
+ \r
+ DPRINTF("READ_L3_L2f\n");\r
+ node = (radix_tree_node) IO_BLOCK(r);\r
+ clear_w_bits(node);\r
+ if (node == NULL) goto fail;\r
+ a = node[L2_IDX(req->vaddr)];\r
+ addr = getid(a);\r
+\r
+ req->radix[L3] = node;\r
+ req->state = ALLOC_DATA_L2f;\r
+ block_alloc( req->block, write_cb, req );\r
+ break;\r
+ \r
+ case ALLOC_DATA_L2f:\r
+\r
+ DPRINTF("ALLOC_DATA_L2f\n");\r
+ addr = IO_ADDR(r);\r
+ a = writable(addr);\r
+ req->radix[L3][L3_IDX(req->vaddr)] = a;\r
+ req->state = ALLOC_L3_L2f;\r
+ block_alloc( (char*)req->radix[L3], write_cb, req );\r
+ break;\r
+\r
+ case ALLOC_L3_L2f:\r
+\r
+ DPRINTF("ALLOC_L3_L2f\n");\r
+ addr = IO_ADDR(r);\r
+ a = writable(addr);\r
+ req->radix[L2][L2_IDX(req->vaddr)] = a;\r
+ req->state = WRITE_L2_L2f;\r
+ block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);\r
+ break;\r
+ \r
+ /* L1 Zero Path: */\r
+ \r
+ case ALLOC_DATA_L1z:\r
+\r
+ DPRINTF("ALLOC_DATA_L1z\n");\r
+ addr = IO_ADDR(r);\r
+ a = writable(addr);\r
+ req->radix[L3] = newblock();\r
+ req->radix[L3][L3_IDX(req->vaddr)] = a;\r
+ req->state = ALLOC_L3_L1z;\r
+ block_alloc( (char*)req->radix[L3], write_cb, req );\r
+ break;\r
+\r
+ case ALLOC_L3_L1z:\r
+\r
+ DPRINTF("ALLOC_L3_L1z\n");\r
+ addr = IO_ADDR(r);\r
+ a = writable(addr);\r
+ req->radix[L2] = newblock();\r
+ req->radix[L2][L2_IDX(req->vaddr)] = a;\r
+ req->state = ALLOC_L2_L1z;\r
+ block_alloc( (char*)req->radix[L2], write_cb, req );\r
+ break;\r
+\r
+ case ALLOC_L2_L1z:\r
+\r
+ DPRINTF("ALLOC_L2_L1z\n");\r
+ addr = IO_ADDR(r);\r
+ a = writable(addr);\r
+ req->radix[L1][L1_IDX(req->vaddr)] = a;\r
+ req->state = WRITE_L1_L1z;\r
+ block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);\r
+ break;\r
+\r
+ /* L1 Fault Path: */\r
+ \r
+ case READ_L2_L1f:\r
+ \r
+ DPRINTF("READ_L2_L1f\n");\r
+ node = (radix_tree_node) IO_BLOCK(r);\r
+ clear_w_bits(node);\r
+ if (node == NULL) goto fail;\r
+ a = node[L2_IDX(req->vaddr)];\r
+ addr = getid(a);\r
+\r
+ req->radix_addr[L3] = addr;\r
+ req->radix[L2] = node;\r
+ \r
+ if (addr == ZERO) {\r
+ /* nothing below L2, create an empty L3 and alloc data. */\r
+ /* (So skip READ_L3_L1f.) */\r
+ req->radix[L3] = newblock();\r
+ req->state = ALLOC_DATA_L1f;\r
+ block_alloc( req->block, write_cb, req );\r
+ } else {\r
+ req->state = READ_L3_L1f;\r
+ block_read( addr, write_cb, req );\r
+ }\r
+ break;\r
+ \r
+ case READ_L3_L1f:\r
+ \r
+ DPRINTF("READ_L3_L1f\n");\r
+ node = (radix_tree_node) IO_BLOCK(r);\r
+ clear_w_bits(node);\r
+ if (node == NULL) goto fail;\r
+ a = node[L2_IDX(req->vaddr)];\r
+ addr = getid(a);\r
+\r
+ req->radix[L3] = node;\r
+ req->state = ALLOC_DATA_L1f;\r
+ block_alloc( req->block, write_cb, req );\r
+ break;\r
+ \r
+ case ALLOC_DATA_L1f:\r
+\r
+ DPRINTF("ALLOC_DATA_L1f\n");\r
+ addr = IO_ADDR(r);\r
+ a = writable(addr);\r
+ req->radix[L3][L3_IDX(req->vaddr)] = a;\r
+ req->state = ALLOC_L3_L1f;\r
+ block_alloc( (char*)req->radix[L3], write_cb, req );\r
+ break;\r
+\r
+ case ALLOC_L3_L1f:\r
+\r
+ DPRINTF("ALLOC_L3_L1f\n");\r
+ addr = IO_ADDR(r);\r
+ a = writable(addr);\r
+ req->radix[L2][L2_IDX(req->vaddr)] = a;\r
+ req->state = ALLOC_L2_L1f;\r
+ block_alloc( (char*)req->radix[L2], write_cb, req );\r
+ break;\r
+\r
+ case ALLOC_L2_L1f:\r
+\r
+ DPRINTF("ALLOC_L2_L1f\n");\r
+ addr = IO_ADDR(r);\r
+ a = writable(addr);\r
+ req->radix[L1][L1_IDX(req->vaddr)] = a;\r
+ req->state = WRITE_L1_L1f;\r
+ block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);\r
+ break;\r
+\r
+ case WRITE_DATA:\r
+ case WRITE_L3_L3z:\r
+ case WRITE_L3_L3f:\r
+ case WRITE_L2_L2z:\r
+ case WRITE_L2_L2f:\r
+ case WRITE_L1_L1z:\r
+ case WRITE_L1_L1f:\r
+ {\r
+ int i;\r
+ DPRINTF("DONE\n");\r
+ /* free any saved node vals. */\r
+ for (i=0; i<3; i++)\r
+ if (req->radix[i] != 0) free(req->radix[i]);\r
+ req->retval = r;\r
+ req->state = WRITE_UNLOCKED;\r
+ block_wunlock(req->lock, L1_IDX(req->vaddr), write_cb, req);\r
+ break;\r
+ }\r
+ case WRITE_UNLOCKED:\r
+ {\r
+ struct io_ret r;\r
+ io_cb_t cb;\r
+ DPRINTF("WRITE_UNLOCKED!\n");\r
+ req_param = req->param;\r
+ r = req->retval;\r
+ cb = req->cb;\r
+ free(req);\r
+ cb(r, req_param);\r
+ break;\r
+ }\r
+ \r
+ default:\r
+ DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);\r
+ goto fail;\r
+ }\r
+ \r
+ return;\r
+ \r
+ fail:\r
+ {\r
+ struct io_ret r;\r
+ io_cb_t cb;\r
+ DPRINTF("asyn_write had a read error mid-way.\n");\r
+ req_param = req->param;\r
+ cb = req->cb;\r
+ r.type = IO_INT_T;\r
+ r.u.i = -1;\r
+ free(req);\r
+ cb(r, req_param);\r
+ }\r
+}\r
+\r
--- /dev/null
+#ifndef _REQUESTSASYNC_H_\r
+#define _REQUESTSASYNC_H_\r
+\r
+#include "block-async.h"\r
+#include "blockstore.h" /* for newblock etc. */\r
+\r
+/*\r
+#define BLOCK_SIZE 4096\r
+#define ZERO 0ULL\r
+#define getid(x) (((x)>>1)&0x7fffffffffffffffLLU)\r
+#define iswritable(x) (((x) & 1LLU) != 0)\r
+#define writable(x) (((x) << 1) | 1LLU)\r
+#define readonly(x) ((u64)((x) << 1))\r
+*/\r
+\r
+int async_read (vdi_t *vdi, u64 vaddr, io_cb_t cb, void *param);\r
+int async_write(vdi_t *vdi, u64 vaddr, char *block, io_cb_t cb, void *param);\r
+ \r
+#endif //_REQUESTSASYNC_H_\r
#include <fcntl.h>
#include <string.h>
#include <sys/time.h>
+#include <pthread.h>
#include "blockstore.h"
+#include "block-async.h"
#include "radix.h"
#include "vdi.h"
#define VDI_REG_BLOCK 2LL
#define VDI_RADIX_ROOT writable(3)
-#if 1
+#if 0
#define DPRINTF(_f, _a...) printf ( _f , ## _a )
#else
#define DPRINTF(_f, _a...) ((void)0)
return vdi_reg;
}
+
vdi_t *vdi_create(snap_id_t *parent_snap, char *name)
{
int ret;
vdi->id = vdi_reg->nr_vdis++;
strncpy(vdi->name, name, VDI_NAME_SZ);
vdi->name[VDI_NAME_SZ] = '\0';
+ vdi->radix_lock = NULL; /* for tidiness */
writeblock(vdi->block, (void *)vdi);
update(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi->id, vdi->block);
writeblock(VDI_REG_BLOCK, (void *)vdi_reg);
freeblock(vdi_reg);
+ vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
+ if (vdi->radix_lock == NULL)
+ {
+ perror("couldn't malloc radix_lock for new vdi!");
+ freeblock(vdi);
+ return NULL;
+ }
+ radix_lock_init(vdi->radix_lock);
+
return vdi;
}
return NULL;
vdi = (vdi_t *)readblock(vdi_blk);
+
+ vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
+ if (vdi->radix_lock == NULL)
+ {
+ perror("couldn't malloc radix_lock for new vdi!");
+ freeblock(vdi);
+ return NULL;
+ }
+ radix_lock_init(vdi->radix_lock);
+
return vdi;
}
+#ifndef _VDI_H_
+#define _VDI_H_
/**************************************************************************
*
* vdi.h
#include "blktaplib.h"
#include "snaplog.h"
-#define VDI_HEIGHT 35
-#define VDI_REG_HEIGHT 35 /* why not? */
+#define VDI_HEIGHT 27 /* Note that these are now hard-coded */
+#define VDI_REG_HEIGHT 27 /* in the async lookup code */
#define VDI_NAME_SZ 256
+
typedef struct vdi {
u64 id; /* unique vdi id -- used by the registry */
u64 block; /* block where this vdi lives (also unique)*/
snap_id_t snap; /* next snapshot slot for this VDI */
struct vdi *next; /* used to hash-chain in blkif. */
blkif_vdev_t vdevice; /* currently mounted as... */
+ struct radix_lock *radix_lock;/* per-line L1 RW lock for parallel reqs */
char name[VDI_NAME_SZ];/* human readable vdi name */
} vdi_t;
#endif /* __VDI_H__ */
+
+#endif //_VDI_H_